Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1 | /* |
| 2 | * kmp_barrier.cpp |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 3 | */ |
| 4 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 5 | //===----------------------------------------------------------------------===// |
| 6 | // |
Chandler Carruth | b7aa2c7 | 2019-01-19 10:56:40 +0000 | [diff] [blame] | 7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 8 | // See https://llvm.org/LICENSE.txt for license information. |
| 9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 13 | #include "kmp_wait_release.h" |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 14 | #include "kmp_barrier.h" |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 15 | #include "kmp_itt.h" |
Jonathan Peyton | ea766a2 | 2015-10-08 18:23:38 +0000 | [diff] [blame] | 16 | #include "kmp_os.h" |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 17 | #include "kmp_stats.h" |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 18 | #include "ompt-specific.h" |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 19 | // for distributed barrier |
| 20 | #include "kmp_affinity.h" |
Jonathan Peyton | ea766a2 | 2015-10-08 18:23:38 +0000 | [diff] [blame] | 21 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 22 | #if KMP_MIC |
| 23 | #include <immintrin.h> |
| 24 | #define USE_NGO_STORES 1 |
| 25 | #endif // KMP_MIC |
| 26 | |
| 27 | #if KMP_MIC && USE_NGO_STORES |
| 28 | // ICV copying |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 29 | #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 30 | #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 31 | #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) |
| 32 | #define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 33 | #else |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 34 | #define ngo_load(src) ((void)0) |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 35 | #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 36 | #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) |
| 37 | #define ngo_sync() ((void)0) |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 38 | #endif /* KMP_MIC && USE_NGO_STORES */ |
| 39 | |
| 40 | void __kmp_print_structure(void); // Forward declaration |
| 41 | |
| 42 | // ---------------------------- Barrier Algorithms ---------------------------- |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 43 | // Distributed barrier |
| 44 | |
| 45 | // Compute how many threads to have polling each cache-line. |
| 46 | // We want to limit the number of writes to IDEAL_GO_RESOLUTION. |
| 47 | void distributedBarrier::computeVarsForN(size_t n) { |
| 48 | int nsockets = 1; |
| 49 | if (__kmp_topology) { |
| 50 | int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET); |
| 51 | int core_level = __kmp_topology->get_level(KMP_HW_CORE); |
| 52 | int ncores_per_socket = |
| 53 | __kmp_topology->calculate_ratio(core_level, socket_level); |
| 54 | nsockets = __kmp_topology->get_count(socket_level); |
| 55 | |
| 56 | if (nsockets <= 0) |
| 57 | nsockets = 1; |
| 58 | if (ncores_per_socket <= 0) |
| 59 | ncores_per_socket = 1; |
| 60 | |
| 61 | threads_per_go = ncores_per_socket >> 1; |
| 62 | if (!fix_threads_per_go) { |
| 63 | // Minimize num_gos |
| 64 | if (threads_per_go > 4) { |
| 65 | if (KMP_OPTIMIZE_FOR_REDUCTIONS) { |
| 66 | threads_per_go = threads_per_go >> 1; |
| 67 | } |
| 68 | if (threads_per_go > 4 && nsockets == 1) |
| 69 | threads_per_go = threads_per_go >> 1; |
| 70 | } |
| 71 | } |
| 72 | if (threads_per_go == 0) |
| 73 | threads_per_go = 1; |
| 74 | fix_threads_per_go = true; |
| 75 | num_gos = n / threads_per_go; |
| 76 | if (n % threads_per_go) |
| 77 | num_gos++; |
| 78 | if (nsockets == 1 || num_gos == 1) |
| 79 | num_groups = 1; |
| 80 | else { |
| 81 | num_groups = num_gos / nsockets; |
| 82 | if (num_gos % nsockets) |
| 83 | num_groups++; |
| 84 | } |
| 85 | if (num_groups <= 0) |
| 86 | num_groups = 1; |
| 87 | gos_per_group = num_gos / num_groups; |
| 88 | if (num_gos % num_groups) |
| 89 | gos_per_group++; |
| 90 | threads_per_group = threads_per_go * gos_per_group; |
| 91 | } else { |
| 92 | num_gos = n / threads_per_go; |
| 93 | if (n % threads_per_go) |
| 94 | num_gos++; |
| 95 | if (num_gos == 1) |
| 96 | num_groups = 1; |
| 97 | else { |
| 98 | num_groups = num_gos / 2; |
| 99 | if (num_gos % 2) |
| 100 | num_groups++; |
| 101 | } |
| 102 | gos_per_group = num_gos / num_groups; |
| 103 | if (num_gos % num_groups) |
| 104 | gos_per_group++; |
| 105 | threads_per_group = threads_per_go * gos_per_group; |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | void distributedBarrier::computeGo(size_t n) { |
| 110 | // Minimize num_gos |
| 111 | for (num_gos = 1;; num_gos++) |
| 112 | if (IDEAL_CONTENTION * num_gos >= n) |
| 113 | break; |
| 114 | threads_per_go = n / num_gos; |
| 115 | if (n % num_gos) |
| 116 | threads_per_go++; |
| 117 | while (num_gos > MAX_GOS) { |
| 118 | threads_per_go++; |
| 119 | num_gos = n / threads_per_go; |
| 120 | if (n % threads_per_go) |
| 121 | num_gos++; |
| 122 | } |
| 123 | computeVarsForN(n); |
| 124 | } |
| 125 | |
| 126 | // This function is to resize the barrier arrays when the new number of threads |
| 127 | // exceeds max_threads, which is the current size of all the arrays |
| 128 | void distributedBarrier::resize(size_t nthr) { |
| 129 | KMP_DEBUG_ASSERT(nthr > max_threads); |
| 130 | |
| 131 | // expand to requested size * 2 |
| 132 | max_threads = nthr * 2; |
| 133 | |
| 134 | // allocate arrays to new max threads |
| 135 | for (int i = 0; i < MAX_ITERS; ++i) { |
| 136 | if (flags[i]) |
| 137 | flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i], |
| 138 | max_threads * sizeof(flags_s)); |
| 139 | else |
| 140 | flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s)); |
| 141 | } |
| 142 | |
| 143 | if (go) |
| 144 | go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s)); |
| 145 | else |
| 146 | go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s)); |
| 147 | |
| 148 | if (iter) |
| 149 | iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s)); |
| 150 | else |
| 151 | iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s)); |
| 152 | |
| 153 | if (sleep) |
| 154 | sleep = |
| 155 | (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s)); |
| 156 | else |
| 157 | sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s)); |
| 158 | } |
| 159 | |
| 160 | // This function is to set all the go flags that threads might be waiting |
| 161 | // on, and when blocktime is not infinite, it should be followed by a wake-up |
| 162 | // call to each thread |
| 163 | kmp_uint64 distributedBarrier::go_release() { |
| 164 | kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS; |
| 165 | for (size_t j = 0; j < num_gos; j++) { |
| 166 | go[j].go.store(next_go); |
| 167 | } |
| 168 | return next_go; |
| 169 | } |
| 170 | |
| 171 | void distributedBarrier::go_reset() { |
| 172 | for (size_t j = 0; j < max_threads; ++j) { |
| 173 | for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) { |
| 174 | flags[i][j].stillNeed = 1; |
| 175 | } |
| 176 | go[j].go.store(0); |
| 177 | iter[j].iter = 0; |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | // This function inits/re-inits the distributed barrier for a particular number |
| 182 | // of threads. If a resize of arrays is needed, it calls the resize function. |
| 183 | void distributedBarrier::init(size_t nthr) { |
| 184 | size_t old_max = max_threads; |
| 185 | if (nthr > max_threads) { // need more space in arrays |
| 186 | resize(nthr); |
| 187 | } |
| 188 | |
| 189 | for (size_t i = 0; i < max_threads; i++) { |
| 190 | for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) { |
| 191 | flags[j][i].stillNeed = 1; |
| 192 | } |
| 193 | go[i].go.store(0); |
| 194 | iter[i].iter = 0; |
| 195 | if (i >= old_max) |
| 196 | sleep[i].sleep = false; |
| 197 | } |
| 198 | |
| 199 | // Recalculate num_gos, etc. based on new nthr |
| 200 | computeVarsForN(nthr); |
| 201 | |
| 202 | num_threads = nthr; |
| 203 | |
| 204 | if (team_icvs == NULL) |
| 205 | team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t)); |
| 206 | } |
| 207 | |
| 208 | // This function is used only when KMP_BLOCKTIME is not infinite. |
| 209 | // static |
| 210 | void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team, |
| 211 | size_t start, size_t stop, size_t inc, |
| 212 | size_t tid) { |
| 213 | KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME); |
| 214 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 215 | return; |
| 216 | |
| 217 | kmp_info_t **other_threads = team->t.t_threads; |
| 218 | for (size_t thr = start; thr < stop; thr += inc) { |
| 219 | KMP_DEBUG_ASSERT(other_threads[thr]); |
| 220 | int gtid = other_threads[thr]->th.th_info.ds.ds_gtid; |
| 221 | // Wake up worker regardless of if it appears to be sleeping or not |
| 222 | __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL); |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | static void __kmp_dist_barrier_gather( |
| 227 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 228 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 229 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather); |
| 230 | kmp_team_t *team; |
| 231 | distributedBarrier *b; |
| 232 | kmp_info_t **other_threads; |
| 233 | kmp_uint64 my_current_iter, my_next_iter; |
| 234 | kmp_uint32 nproc; |
| 235 | bool group_leader; |
| 236 | |
| 237 | team = this_thr->th.th_team; |
| 238 | nproc = this_thr->th.th_team_nproc; |
| 239 | other_threads = team->t.t_threads; |
| 240 | b = team->t.b; |
| 241 | my_current_iter = b->iter[tid].iter; |
| 242 | my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS; |
| 243 | group_leader = ((tid % b->threads_per_group) == 0); |
| 244 | |
| 245 | KA_TRACE(20, |
| 246 | ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n", |
| 247 | gtid, team->t.t_id, tid, bt)); |
| 248 | |
| 249 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| 250 | // Barrier imbalance - save arrive time to the thread |
| 251 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { |
| 252 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = |
| 253 | __itt_get_timestamp(); |
| 254 | } |
| 255 | #endif |
| 256 | |
| 257 | if (group_leader) { |
| 258 | // Start from the thread after the group leader |
| 259 | size_t group_start = tid + 1; |
| 260 | size_t group_end = tid + b->threads_per_group; |
| 261 | size_t threads_pending = 0; |
| 262 | |
| 263 | if (group_end > nproc) |
| 264 | group_end = nproc; |
| 265 | do { // wait for threads in my group |
| 266 | threads_pending = 0; |
| 267 | // Check all the flags every time to avoid branch misspredict |
| 268 | for (size_t thr = group_start; thr < group_end; thr++) { |
| 269 | // Each thread uses a different cache line |
| 270 | threads_pending += b->flags[my_current_iter][thr].stillNeed; |
| 271 | } |
| 272 | // Execute tasks here |
| 273 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
| 274 | kmp_task_team_t *task_team = this_thr->th.th_task_team; |
| 275 | if (task_team != NULL) { |
| 276 | if (TCR_SYNC_4(task_team->tt.tt_active)) { |
| 277 | if (KMP_TASKING_ENABLED(task_team)) { |
| 278 | int tasks_completed = FALSE; |
| 279 | __kmp_atomic_execute_tasks_64( |
| 280 | this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE, |
| 281 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); |
| 282 | } else |
| 283 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
| 284 | } |
| 285 | } else { |
| 286 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
| 287 | } // if |
| 288 | } |
| 289 | if (TCR_4(__kmp_global.g.g_done)) { |
| 290 | if (__kmp_global.g.g_abort) |
| 291 | __kmp_abort_thread(); |
| 292 | break; |
| 293 | } else if (__kmp_tasking_mode != tskm_immediate_exec && |
| 294 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { |
| 295 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; |
| 296 | } |
| 297 | } while (threads_pending > 0); |
| 298 | |
| 299 | if (reduce) { // Perform reduction if needed |
| 300 | OMPT_REDUCTION_DECL(this_thr, gtid); |
| 301 | OMPT_REDUCTION_BEGIN; |
| 302 | // Group leader reduces all threads in group |
| 303 | for (size_t thr = group_start; thr < group_end; thr++) { |
| 304 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 305 | other_threads[thr]->th.th_local.reduce_data); |
| 306 | } |
| 307 | OMPT_REDUCTION_END; |
| 308 | } |
| 309 | |
| 310 | // Set flag for next iteration |
| 311 | b->flags[my_next_iter][tid].stillNeed = 1; |
| 312 | // Each thread uses a different cache line; resets stillNeed to 0 to |
| 313 | // indicate it has reached the barrier |
| 314 | b->flags[my_current_iter][tid].stillNeed = 0; |
| 315 | |
| 316 | do { // wait for all group leaders |
| 317 | threads_pending = 0; |
| 318 | for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) { |
| 319 | threads_pending += b->flags[my_current_iter][thr].stillNeed; |
| 320 | } |
| 321 | // Execute tasks here |
| 322 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
| 323 | kmp_task_team_t *task_team = this_thr->th.th_task_team; |
| 324 | if (task_team != NULL) { |
| 325 | if (TCR_SYNC_4(task_team->tt.tt_active)) { |
| 326 | if (KMP_TASKING_ENABLED(task_team)) { |
| 327 | int tasks_completed = FALSE; |
| 328 | __kmp_atomic_execute_tasks_64( |
| 329 | this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE, |
| 330 | &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); |
| 331 | } else |
| 332 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
| 333 | } |
| 334 | } else { |
| 335 | this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; |
| 336 | } // if |
| 337 | } |
| 338 | if (TCR_4(__kmp_global.g.g_done)) { |
| 339 | if (__kmp_global.g.g_abort) |
| 340 | __kmp_abort_thread(); |
| 341 | break; |
| 342 | } else if (__kmp_tasking_mode != tskm_immediate_exec && |
| 343 | this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { |
| 344 | this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; |
| 345 | } |
| 346 | } while (threads_pending > 0); |
| 347 | |
| 348 | if (reduce) { // Perform reduction if needed |
| 349 | if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders |
| 350 | OMPT_REDUCTION_DECL(this_thr, gtid); |
| 351 | OMPT_REDUCTION_BEGIN; |
| 352 | for (size_t thr = b->threads_per_group; thr < nproc; |
| 353 | thr += b->threads_per_group) { |
| 354 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 355 | other_threads[thr]->th.th_local.reduce_data); |
| 356 | } |
| 357 | OMPT_REDUCTION_END; |
| 358 | } |
| 359 | } |
| 360 | } else { |
| 361 | // Set flag for next iteration |
| 362 | b->flags[my_next_iter][tid].stillNeed = 1; |
| 363 | // Each thread uses a different cache line; resets stillNeed to 0 to |
| 364 | // indicate it has reached the barrier |
| 365 | b->flags[my_current_iter][tid].stillNeed = 0; |
| 366 | } |
| 367 | |
| 368 | KMP_MFENCE(); |
| 369 | |
| 370 | KA_TRACE(20, |
| 371 | ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", |
| 372 | gtid, team->t.t_id, tid, bt)); |
| 373 | } |
| 374 | |
| 375 | static void __kmp_dist_barrier_release( |
| 376 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 377 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 378 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release); |
| 379 | kmp_team_t *team; |
| 380 | distributedBarrier *b; |
| 381 | kmp_bstate_t *thr_bar; |
| 382 | kmp_uint64 my_current_iter, next_go; |
| 383 | size_t my_go_index; |
| 384 | bool group_leader; |
| 385 | |
| 386 | KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n", |
| 387 | gtid, tid, bt)); |
| 388 | |
| 389 | thr_bar = &this_thr->th.th_bar[bt].bb; |
| 390 | |
| 391 | if (!KMP_MASTER_TID(tid)) { |
| 392 | // workers and non-master group leaders need to check their presence in team |
| 393 | do { |
| 394 | if (this_thr->th.th_used_in_team.load() != 1 && |
| 395 | this_thr->th.th_used_in_team.load() != 3) { |
| 396 | // Thread is not in use in a team. Wait on location in tid's thread |
| 397 | // struct. The 0 value tells anyone looking that this thread is spinning |
| 398 | // or sleeping until this location becomes 3 again; 3 is the transition |
| 399 | // state to get to 1 which is waiting on go and being in the team |
| 400 | kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3); |
| 401 | if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2, |
| 402 | 0) || |
| 403 | this_thr->th.th_used_in_team.load() == 0) { |
| 404 | my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 405 | } |
| 406 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| 407 | if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { |
| 408 | // In fork barrier where we could not get the object reliably |
| 409 | itt_sync_obj = |
| 410 | __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); |
| 411 | // Cancel wait on previous parallel region... |
| 412 | __kmp_itt_task_starting(itt_sync_obj); |
| 413 | |
| 414 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 415 | return; |
| 416 | |
| 417 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 418 | if (itt_sync_obj != NULL) |
| 419 | // Call prepare as early as possible for "new" barrier |
| 420 | __kmp_itt_task_finished(itt_sync_obj); |
| 421 | } else |
| 422 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
| 423 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 424 | return; |
| 425 | } |
| 426 | if (this_thr->th.th_used_in_team.load() != 1 && |
| 427 | this_thr->th.th_used_in_team.load() != 3) // spurious wake-up? |
| 428 | continue; |
| 429 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 430 | return; |
| 431 | |
| 432 | // At this point, the thread thinks it is in use in a team, or in |
| 433 | // transition to be used in a team, but it might have reached this barrier |
| 434 | // before it was marked unused by the team. Unused threads are awoken and |
| 435 | // shifted to wait on local thread struct elsewhere. It also might reach |
| 436 | // this point by being picked up for use by a different team. Either way, |
| 437 | // we need to update the tid. |
| 438 | tid = __kmp_tid_from_gtid(gtid); |
| 439 | team = this_thr->th.th_team; |
| 440 | KMP_DEBUG_ASSERT(tid >= 0); |
| 441 | KMP_DEBUG_ASSERT(team); |
| 442 | b = team->t.b; |
| 443 | my_current_iter = b->iter[tid].iter; |
| 444 | next_go = my_current_iter + distributedBarrier::MAX_ITERS; |
| 445 | my_go_index = tid / b->threads_per_go; |
| 446 | if (this_thr->th.th_used_in_team.load() == 3) { |
Hansang Bae | e7c9f44 | 2024-08-15 10:42:22 -0500 | [diff] [blame^] | 447 | (void)KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, |
| 448 | 1); |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 449 | } |
| 450 | // Check if go flag is set |
| 451 | if (b->go[my_go_index].go.load() != next_go) { |
| 452 | // Wait on go flag on team |
| 453 | kmp_atomic_flag_64<false, true> my_flag( |
| 454 | &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep)); |
| 455 | my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 456 | KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter || |
| 457 | b->iter[tid].iter == 0); |
| 458 | KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false); |
| 459 | } |
| 460 | |
| 461 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 462 | return; |
| 463 | // At this point, the thread's go location was set. This means the primary |
| 464 | // thread is safely in the barrier, and so this thread's data is |
| 465 | // up-to-date, but we should check again that this thread is really in |
| 466 | // use in the team, as it could have been woken up for the purpose of |
| 467 | // changing team size, or reaping threads at shutdown. |
| 468 | if (this_thr->th.th_used_in_team.load() == 1) |
| 469 | break; |
| 470 | } while (1); |
| 471 | |
| 472 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 473 | return; |
| 474 | |
| 475 | group_leader = ((tid % b->threads_per_group) == 0); |
| 476 | if (group_leader) { |
| 477 | // Tell all the threads in my group they can go! |
| 478 | for (size_t go_idx = my_go_index + 1; |
| 479 | go_idx < my_go_index + b->gos_per_group; go_idx++) { |
| 480 | b->go[go_idx].go.store(next_go); |
| 481 | } |
| 482 | // Fence added so that workers can see changes to go. sfence inadequate. |
| 483 | KMP_MFENCE(); |
| 484 | } |
| 485 | |
| 486 | #if KMP_BARRIER_ICV_PUSH |
| 487 | if (propagate_icvs) { // copy ICVs to final dest |
| 488 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, |
| 489 | tid, FALSE); |
| 490 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, |
| 491 | (kmp_internal_control_t *)team->t.b->team_icvs); |
| 492 | copy_icvs(&thr_bar->th_fixed_icvs, |
| 493 | &team->t.t_implicit_task_taskdata[tid].td_icvs); |
| 494 | } |
| 495 | #endif |
| 496 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) { |
| 497 | // This thread is now awake and participating in the barrier; |
| 498 | // wake up the other threads in the group |
| 499 | size_t nproc = this_thr->th.th_team_nproc; |
| 500 | size_t group_end = tid + b->threads_per_group; |
| 501 | if (nproc < group_end) |
| 502 | group_end = nproc; |
| 503 | __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); |
| 504 | } |
| 505 | } else { // Primary thread |
| 506 | team = this_thr->th.th_team; |
| 507 | b = team->t.b; |
| 508 | my_current_iter = b->iter[tid].iter; |
| 509 | next_go = my_current_iter + distributedBarrier::MAX_ITERS; |
| 510 | #if KMP_BARRIER_ICV_PUSH |
| 511 | if (propagate_icvs) { |
| 512 | // primary thread has ICVs in final destination; copy |
| 513 | copy_icvs(&thr_bar->th_fixed_icvs, |
| 514 | &team->t.t_implicit_task_taskdata[tid].td_icvs); |
| 515 | } |
| 516 | #endif |
| 517 | // Tell all the group leaders they can go! |
| 518 | for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) { |
| 519 | b->go[go_idx].go.store(next_go); |
| 520 | } |
| 521 | |
| 522 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
| 523 | // Wake-up the group leaders |
| 524 | size_t nproc = this_thr->th.th_team_nproc; |
| 525 | __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc, |
| 526 | b->threads_per_group, tid); |
| 527 | } |
| 528 | |
| 529 | // Tell all the threads in my group they can go! |
| 530 | for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) { |
| 531 | b->go[go_idx].go.store(next_go); |
| 532 | } |
| 533 | |
| 534 | // Fence added so that workers can see changes to go. sfence inadequate. |
| 535 | KMP_MFENCE(); |
| 536 | |
| 537 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
| 538 | // Wake-up the other threads in my group |
| 539 | size_t nproc = this_thr->th.th_team_nproc; |
| 540 | size_t group_end = tid + b->threads_per_group; |
| 541 | if (nproc < group_end) |
| 542 | group_end = nproc; |
| 543 | __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); |
| 544 | } |
| 545 | } |
| 546 | // Update to next iteration |
| 547 | KMP_ASSERT(my_current_iter == b->iter[tid].iter); |
| 548 | b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS; |
| 549 | |
| 550 | KA_TRACE( |
| 551 | 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", |
| 552 | gtid, team->t.t_id, tid, bt)); |
| 553 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 554 | |
| 555 | // Linear Barrier |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 556 | template <bool cancellable = false> |
| 557 | static bool __kmp_linear_barrier_gather_template( |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 558 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 559 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 560 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 561 | kmp_team_t *team = this_thr->th.th_team; |
| 562 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 563 | kmp_info_t **other_threads = team->t.t_threads; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 564 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 565 | KA_TRACE( |
| 566 | 20, |
| 567 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", |
| 568 | gtid, team->t.t_id, tid, bt)); |
| 569 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 570 | |
| 571 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 572 | // Barrier imbalance - save arrive time to the thread |
| 573 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { |
| 574 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = |
| 575 | __itt_get_timestamp(); |
| 576 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 577 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 578 | // We now perform a linear reduction to signal that all of the threads have |
| 579 | // arrived. |
| 580 | if (!KMP_MASTER_TID(tid)) { |
| 581 | KA_TRACE(20, |
| 582 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" |
| 583 | "arrived(%p): %llu => %llu\n", |
| 584 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team), |
| 585 | team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived, |
| 586 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 587 | // Mark arrival to primary thread |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 588 | /* After performing this write, a worker thread may not assume that the team |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 589 | is valid any more - it could be deallocated by the primary thread at any |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 590 | time. */ |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 591 | kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 592 | flag.release(); |
| 593 | } else { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 594 | kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; |
| 595 | int nproc = this_thr->th.th_team_nproc; |
| 596 | int i; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 597 | // Don't have to worry about sleep bit here or atomic since team setting |
Jonathan Peyton | c2f9edc | 2017-10-20 19:30:57 +0000 | [diff] [blame] | 598 | kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 599 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 600 | // Collect all the worker team member threads. |
| 601 | for (i = 1; i < nproc; ++i) { |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 602 | #if KMP_CACHE_MANAGE |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 603 | // Prefetch next thread's arrived count |
| 604 | if (i + 1 < nproc) |
| 605 | KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 606 | #endif /* KMP_CACHE_MANAGE */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 607 | KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " |
| 608 | "arrived(%p) == %llu\n", |
| 609 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), |
| 610 | team->t.t_id, i, |
| 611 | &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 612 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 613 | // Wait for worker thread to arrive |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 614 | if (cancellable) { |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 615 | kmp_flag_64<true, false> flag( |
| 616 | &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); |
| 617 | if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj))) |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 618 | return true; |
| 619 | } else { |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 620 | kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, |
| 621 | new_state); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 622 | flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 623 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 624 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 625 | // Barrier imbalance - write min of the thread time and the other thread |
| 626 | // time to the thread. |
| 627 | if (__kmp_forkjoin_frames_mode == 2) { |
| 628 | this_thr->th.th_bar_min_time = KMP_MIN( |
| 629 | this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time); |
| 630 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 631 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 632 | if (reduce) { |
| 633 | KA_TRACE(100, |
| 634 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", |
| 635 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), |
| 636 | team->t.t_id, i)); |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 637 | OMPT_REDUCTION_DECL(this_thr, gtid); |
| 638 | OMPT_REDUCTION_BEGIN; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 639 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 640 | other_threads[i]->th.th_local.reduce_data); |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 641 | OMPT_REDUCTION_END; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 642 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 643 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 644 | // Don't have to worry about sleep bit here or atomic since team setting |
| 645 | team_bar->b_arrived = new_state; |
| 646 | KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " |
| 647 | "arrived(%p) = %llu\n", |
| 648 | gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, |
| 649 | new_state)); |
| 650 | } |
| 651 | KA_TRACE( |
| 652 | 20, |
| 653 | ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", |
| 654 | gtid, team->t.t_id, tid, bt)); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 655 | return false; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 656 | } |
| 657 | |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 658 | template <bool cancellable = false> |
| 659 | static bool __kmp_linear_barrier_release_template( |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 660 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 661 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 662 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 663 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 664 | kmp_team_t *team; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 665 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 666 | if (KMP_MASTER_TID(tid)) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 667 | unsigned int i; |
| 668 | kmp_uint32 nproc = this_thr->th.th_team_nproc; |
| 669 | kmp_info_t **other_threads; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 670 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 671 | team = __kmp_threads[gtid]->th.th_team; |
| 672 | KMP_DEBUG_ASSERT(team != NULL); |
| 673 | other_threads = team->t.t_threads; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 674 | |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 675 | KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for " |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 676 | "barrier type %d\n", |
| 677 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 678 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 679 | if (nproc > 1) { |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 680 | #if KMP_BARRIER_ICV_PUSH |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 681 | { |
| 682 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); |
| 683 | if (propagate_icvs) { |
| 684 | ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); |
| 685 | for (i = 1; i < nproc; ++i) { |
| 686 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], |
| 687 | team, i, FALSE); |
| 688 | ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, |
| 689 | &team->t.t_implicit_task_taskdata[0].td_icvs); |
| 690 | } |
| 691 | ngo_sync(); |
| 692 | } |
| 693 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 694 | #endif // KMP_BARRIER_ICV_PUSH |
| 695 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 696 | // Now, release all of the worker threads |
| 697 | for (i = 1; i < nproc; ++i) { |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 698 | #if KMP_CACHE_MANAGE |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 699 | // Prefetch next thread's go flag |
| 700 | if (i + 1 < nproc) |
| 701 | KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 702 | #endif /* KMP_CACHE_MANAGE */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 703 | KA_TRACE( |
| 704 | 20, |
| 705 | ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " |
| 706 | "go(%p): %u => %u\n", |
| 707 | gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid, |
| 708 | team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go, |
| 709 | other_threads[i]->th.th_bar[bt].bb.b_go, |
| 710 | other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 711 | kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go, |
Shilei Tian | d0678fa | 2021-02-20 12:46:04 -0500 | [diff] [blame] | 712 | other_threads[i]); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 713 | flag.release(); |
| 714 | } |
| 715 | } |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 716 | } else { // Wait for the PRIMARY thread to release us |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 717 | KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", |
| 718 | gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 719 | if (cancellable) { |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 720 | kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); |
| 721 | if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj))) |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 722 | return true; |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 723 | } else { |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 724 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 725 | flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 726 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 727 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 728 | if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { |
| 729 | // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is |
| 730 | // disabled) |
| 731 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); |
| 732 | // Cancel wait on previous parallel region... |
| 733 | __kmp_itt_task_starting(itt_sync_obj); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 734 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 735 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 736 | return false; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 737 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 738 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 739 | if (itt_sync_obj != NULL) |
| 740 | // Call prepare as early as possible for "new" barrier |
| 741 | __kmp_itt_task_finished(itt_sync_obj); |
| 742 | } else |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 743 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
| 744 | // Early exit for reaping threads releasing forkjoin barrier |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 745 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 746 | return false; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 747 | // The worker thread may now assume that the team is valid. |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 748 | #ifdef KMP_DEBUG |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 749 | tid = __kmp_tid_from_gtid(gtid); |
| 750 | team = __kmp_threads[gtid]->th.th_team; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 751 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 752 | KMP_DEBUG_ASSERT(team != NULL); |
| 753 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); |
| 754 | KA_TRACE(20, |
| 755 | ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", |
| 756 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); |
| 757 | KMP_MB(); // Flush all pending memory write invalidates. |
| 758 | } |
| 759 | KA_TRACE( |
| 760 | 20, |
| 761 | ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", |
| 762 | gtid, team->t.t_id, tid, bt)); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 763 | return false; |
| 764 | } |
| 765 | |
| 766 | static void __kmp_linear_barrier_gather( |
| 767 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 768 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 769 | __kmp_linear_barrier_gather_template<false>( |
| 770 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 771 | } |
| 772 | |
| 773 | static bool __kmp_linear_barrier_gather_cancellable( |
| 774 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 775 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 776 | return __kmp_linear_barrier_gather_template<true>( |
| 777 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 778 | } |
| 779 | |
| 780 | static void __kmp_linear_barrier_release( |
| 781 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 782 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 783 | __kmp_linear_barrier_release_template<false>( |
| 784 | bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 785 | } |
| 786 | |
| 787 | static bool __kmp_linear_barrier_release_cancellable( |
| 788 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 789 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 790 | return __kmp_linear_barrier_release_template<true>( |
| 791 | bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 792 | } |
| 793 | |
| 794 | // Tree barrier |
Shilei Tian | d0678fa | 2021-02-20 12:46:04 -0500 | [diff] [blame] | 795 | static void __kmp_tree_barrier_gather( |
| 796 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 797 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 798 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 799 | kmp_team_t *team = this_thr->th.th_team; |
| 800 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 801 | kmp_info_t **other_threads = team->t.t_threads; |
| 802 | kmp_uint32 nproc = this_thr->th.th_team_nproc; |
| 803 | kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; |
| 804 | kmp_uint32 branch_factor = 1 << branch_bits; |
| 805 | kmp_uint32 child; |
| 806 | kmp_uint32 child_tid; |
Hansang Bae | a3918ee | 2021-03-11 17:34:06 -0600 | [diff] [blame] | 807 | kmp_uint64 new_state = 0; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 808 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 809 | KA_TRACE( |
| 810 | 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", |
| 811 | gtid, team->t.t_id, tid, bt)); |
| 812 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 813 | |
| 814 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 815 | // Barrier imbalance - save arrive time to the thread |
| 816 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { |
| 817 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = |
| 818 | __itt_get_timestamp(); |
| 819 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 820 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 821 | // Perform tree gather to wait until all threads have arrived; reduce any |
| 822 | // required data as we go |
| 823 | child_tid = (tid << branch_bits) + 1; |
| 824 | if (child_tid < nproc) { |
| 825 | // Parent threads wait for all their children to arrive |
| 826 | new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; |
| 827 | child = 1; |
| 828 | do { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 829 | kmp_info_t *child_thr = other_threads[child_tid]; |
| 830 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 831 | #if KMP_CACHE_MANAGE |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 832 | // Prefetch next thread's arrived count |
| 833 | if (child + 1 <= branch_factor && child_tid + 1 < nproc) |
| 834 | KMP_CACHE_PREFETCH( |
| 835 | &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 836 | #endif /* KMP_CACHE_MANAGE */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 837 | KA_TRACE(20, |
| 838 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " |
| 839 | "arrived(%p) == %llu\n", |
| 840 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 841 | team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); |
| 842 | // Wait for child to arrive |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 843 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 844 | flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 845 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 846 | // Barrier imbalance - write min of the thread time and a child time to |
| 847 | // the thread. |
| 848 | if (__kmp_forkjoin_frames_mode == 2) { |
| 849 | this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, |
| 850 | child_thr->th.th_bar_min_time); |
| 851 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 852 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 853 | if (reduce) { |
| 854 | KA_TRACE(100, |
| 855 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", |
| 856 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 857 | team->t.t_id, child_tid)); |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 858 | OMPT_REDUCTION_DECL(this_thr, gtid); |
| 859 | OMPT_REDUCTION_BEGIN; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 860 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 861 | child_thr->th.th_local.reduce_data); |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 862 | OMPT_REDUCTION_END; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 863 | } |
| 864 | child++; |
| 865 | child_tid++; |
| 866 | } while (child <= branch_factor && child_tid < nproc); |
| 867 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 868 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 869 | if (!KMP_MASTER_TID(tid)) { // Worker threads |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 870 | kmp_int32 parent_tid = (tid - 1) >> branch_bits; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 871 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 872 | KA_TRACE(20, |
| 873 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " |
| 874 | "arrived(%p): %llu => %llu\n", |
| 875 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team), |
| 876 | team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived, |
| 877 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 878 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 879 | // Mark arrival to parent thread |
| 880 | /* After performing this write, a worker thread may not assume that the team |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 881 | is valid any more - it could be deallocated by the primary thread at any |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 882 | time. */ |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 883 | kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 884 | flag.release(); |
| 885 | } else { |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 886 | // Need to update the team arrived pointer if we are the primary thread |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 887 | if (nproc > 1) // New value was already computed above |
| 888 | team->t.t_bar[bt].b_arrived = new_state; |
| 889 | else |
| 890 | team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; |
| 891 | KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " |
| 892 | "arrived(%p) = %llu\n", |
| 893 | gtid, team->t.t_id, tid, team->t.t_id, |
| 894 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); |
| 895 | } |
| 896 | KA_TRACE(20, |
| 897 | ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", |
| 898 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 899 | } |
| 900 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 901 | static void __kmp_tree_barrier_release( |
| 902 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 903 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 904 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 905 | kmp_team_t *team; |
| 906 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 907 | kmp_uint32 nproc; |
| 908 | kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; |
| 909 | kmp_uint32 branch_factor = 1 << branch_bits; |
| 910 | kmp_uint32 child; |
| 911 | kmp_uint32 child_tid; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 912 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 913 | // Perform a tree release for all of the threads that have been gathered |
| 914 | if (!KMP_MASTER_TID( |
| 915 | tid)) { // Handle fork barrier workers who aren't part of a team yet |
| 916 | KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid, |
| 917 | &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); |
| 918 | // Wait for parent thread to release us |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 919 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 920 | flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 921 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 922 | if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { |
| 923 | // In fork barrier where we could not get the object reliably (or |
| 924 | // ITTNOTIFY is disabled) |
| 925 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); |
| 926 | // Cancel wait on previous parallel region... |
| 927 | __kmp_itt_task_starting(itt_sync_obj); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 928 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 929 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 930 | return; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 931 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 932 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 933 | if (itt_sync_obj != NULL) |
| 934 | // Call prepare as early as possible for "new" barrier |
| 935 | __kmp_itt_task_finished(itt_sync_obj); |
| 936 | } else |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 937 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
| 938 | // Early exit for reaping threads releasing forkjoin barrier |
| 939 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 940 | return; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 941 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 942 | // The worker thread may now assume that the team is valid. |
| 943 | team = __kmp_threads[gtid]->th.th_team; |
| 944 | KMP_DEBUG_ASSERT(team != NULL); |
| 945 | tid = __kmp_tid_from_gtid(gtid); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 946 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 947 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); |
| 948 | KA_TRACE(20, |
| 949 | ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid, |
| 950 | team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); |
| 951 | KMP_MB(); // Flush all pending memory write invalidates. |
| 952 | } else { |
| 953 | team = __kmp_threads[gtid]->th.th_team; |
| 954 | KMP_DEBUG_ASSERT(team != NULL); |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 955 | KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for " |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 956 | "barrier type %d\n", |
| 957 | gtid, team->t.t_id, tid, bt)); |
| 958 | } |
| 959 | nproc = this_thr->th.th_team_nproc; |
| 960 | child_tid = (tid << branch_bits) + 1; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 961 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 962 | if (child_tid < nproc) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 963 | kmp_info_t **other_threads = team->t.t_threads; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 964 | child = 1; |
| 965 | // Parent threads release all their children |
| 966 | do { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 967 | kmp_info_t *child_thr = other_threads[child_tid]; |
| 968 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 969 | #if KMP_CACHE_MANAGE |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 970 | // Prefetch next thread's go count |
| 971 | if (child + 1 <= branch_factor && child_tid + 1 < nproc) |
| 972 | KMP_CACHE_PREFETCH( |
| 973 | &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 974 | #endif /* KMP_CACHE_MANAGE */ |
| 975 | |
| 976 | #if KMP_BARRIER_ICV_PUSH |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 977 | { |
| 978 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); |
| 979 | if (propagate_icvs) { |
| 980 | __kmp_init_implicit_task(team->t.t_ident, |
| 981 | team->t.t_threads[child_tid], team, |
| 982 | child_tid, FALSE); |
| 983 | copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, |
| 984 | &team->t.t_implicit_task_taskdata[0].td_icvs); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 985 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 986 | } |
| 987 | #endif // KMP_BARRIER_ICV_PUSH |
| 988 | KA_TRACE(20, |
| 989 | ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" |
| 990 | "go(%p): %u => %u\n", |
| 991 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 992 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, |
| 993 | child_bar->b_go + KMP_BARRIER_STATE_BUMP)); |
| 994 | // Release child from barrier |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 995 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 996 | flag.release(); |
| 997 | child++; |
| 998 | child_tid++; |
| 999 | } while (child <= branch_factor && child_tid < nproc); |
| 1000 | } |
| 1001 | KA_TRACE( |
| 1002 | 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", |
| 1003 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1004 | } |
| 1005 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1006 | // Hyper Barrier |
Shilei Tian | d0678fa | 2021-02-20 12:46:04 -0500 | [diff] [blame] | 1007 | static void __kmp_hyper_barrier_gather( |
| 1008 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 1009 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1010 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1011 | kmp_team_t *team = this_thr->th.th_team; |
| 1012 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 1013 | kmp_info_t **other_threads = team->t.t_threads; |
| 1014 | kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE; |
| 1015 | kmp_uint32 num_threads = this_thr->th.th_team_nproc; |
| 1016 | kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; |
| 1017 | kmp_uint32 branch_factor = 1 << branch_bits; |
| 1018 | kmp_uint32 offset; |
| 1019 | kmp_uint32 level; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1020 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1021 | KA_TRACE( |
| 1022 | 20, |
| 1023 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", |
| 1024 | gtid, team->t.t_id, tid, bt)); |
| 1025 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1026 | |
| 1027 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1028 | // Barrier imbalance - save arrive time to the thread |
| 1029 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { |
| 1030 | this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = |
| 1031 | __itt_get_timestamp(); |
| 1032 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1033 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1034 | /* Perform a hypercube-embedded tree gather to wait until all of the threads |
| 1035 | have arrived, and reduce any required data as we go. */ |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1036 | kmp_flag_64<> p_flag(&thr_bar->b_arrived); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1037 | for (level = 0, offset = 1; offset < num_threads; |
| 1038 | level += branch_bits, offset <<= branch_bits) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1039 | kmp_uint32 child; |
| 1040 | kmp_uint32 child_tid; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1041 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1042 | if (((tid >> level) & (branch_factor - 1)) != 0) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1043 | kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1044 | |
Bryan Chan | 8512533 | 2020-04-06 18:43:35 -0400 | [diff] [blame] | 1045 | KMP_MB(); // Synchronize parent and child threads. |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1046 | KA_TRACE(20, |
| 1047 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " |
| 1048 | "arrived(%p): %llu => %llu\n", |
| 1049 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team), |
| 1050 | team->t.t_id, parent_tid, &thr_bar->b_arrived, |
| 1051 | thr_bar->b_arrived, |
| 1052 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); |
| 1053 | // Mark arrival to parent thread |
| 1054 | /* After performing this write (in the last iteration of the enclosing for |
| 1055 | loop), a worker thread may not assume that the team is valid any more |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1056 | - it could be deallocated by the primary thread at any time. */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1057 | p_flag.set_waiter(other_threads[parent_tid]); |
| 1058 | p_flag.release(); |
| 1059 | break; |
| 1060 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1061 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1062 | // Parent threads wait for children to arrive |
| 1063 | if (new_state == KMP_BARRIER_UNUSED_STATE) |
| 1064 | new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; |
| 1065 | for (child = 1, child_tid = tid + (1 << level); |
| 1066 | child < branch_factor && child_tid < num_threads; |
| 1067 | child++, child_tid += (1 << level)) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1068 | kmp_info_t *child_thr = other_threads[child_tid]; |
| 1069 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1070 | #if KMP_CACHE_MANAGE |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1071 | kmp_uint32 next_child_tid = child_tid + (1 << level); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1072 | // Prefetch next thread's arrived count |
| 1073 | if (child + 1 < branch_factor && next_child_tid < num_threads) |
| 1074 | KMP_CACHE_PREFETCH( |
| 1075 | &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1076 | #endif /* KMP_CACHE_MANAGE */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1077 | KA_TRACE(20, |
| 1078 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " |
| 1079 | "arrived(%p) == %llu\n", |
| 1080 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 1081 | team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); |
| 1082 | // Wait for child to arrive |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1083 | kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1084 | c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Bryan Chan | 8512533 | 2020-04-06 18:43:35 -0400 | [diff] [blame] | 1085 | KMP_MB(); // Synchronize parent and child threads. |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1086 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1087 | // Barrier imbalance - write min of the thread time and a child time to |
| 1088 | // the thread. |
| 1089 | if (__kmp_forkjoin_frames_mode == 2) { |
| 1090 | this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, |
| 1091 | child_thr->th.th_bar_min_time); |
| 1092 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1093 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1094 | if (reduce) { |
| 1095 | KA_TRACE(100, |
| 1096 | ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", |
| 1097 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 1098 | team->t.t_id, child_tid)); |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 1099 | OMPT_REDUCTION_DECL(this_thr, gtid); |
| 1100 | OMPT_REDUCTION_BEGIN; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1101 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 1102 | child_thr->th.th_local.reduce_data); |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 1103 | OMPT_REDUCTION_END; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1104 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1105 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1106 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1107 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1108 | if (KMP_MASTER_TID(tid)) { |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1109 | // Need to update the team arrived pointer if we are the primary thread |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1110 | if (new_state == KMP_BARRIER_UNUSED_STATE) |
| 1111 | team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; |
| 1112 | else |
| 1113 | team->t.t_bar[bt].b_arrived = new_state; |
| 1114 | KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " |
| 1115 | "arrived(%p) = %llu\n", |
| 1116 | gtid, team->t.t_id, tid, team->t.t_id, |
| 1117 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); |
| 1118 | } |
| 1119 | KA_TRACE( |
| 1120 | 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", |
| 1121 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1122 | } |
| 1123 | |
| 1124 | // The reverse versions seem to beat the forward versions overall |
| 1125 | #define KMP_REVERSE_HYPER_BAR |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1126 | static void __kmp_hyper_barrier_release( |
| 1127 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 1128 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 1129 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1130 | kmp_team_t *team; |
| 1131 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 1132 | kmp_info_t **other_threads; |
| 1133 | kmp_uint32 num_threads; |
| 1134 | kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; |
| 1135 | kmp_uint32 branch_factor = 1 << branch_bits; |
| 1136 | kmp_uint32 child; |
| 1137 | kmp_uint32 child_tid; |
| 1138 | kmp_uint32 offset; |
| 1139 | kmp_uint32 level; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1140 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1141 | /* Perform a hypercube-embedded tree release for all of the threads that have |
| 1142 | been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads |
| 1143 | are released in the reverse order of the corresponding gather, otherwise |
| 1144 | threads are released in the same order. */ |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1145 | if (KMP_MASTER_TID(tid)) { // primary thread |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1146 | team = __kmp_threads[gtid]->th.th_team; |
| 1147 | KMP_DEBUG_ASSERT(team != NULL); |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1148 | KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for " |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1149 | "barrier type %d\n", |
| 1150 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1151 | #if KMP_BARRIER_ICV_PUSH |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1152 | if (propagate_icvs) { // primary already has ICVs in final destination; copy |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1153 | copy_icvs(&thr_bar->th_fixed_icvs, |
| 1154 | &team->t.t_implicit_task_taskdata[tid].td_icvs); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1155 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1156 | #endif |
| 1157 | } else { // Handle fork barrier workers who aren't part of a team yet |
| 1158 | KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid, |
| 1159 | &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); |
| 1160 | // Wait for parent thread to release us |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1161 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1162 | flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1163 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1164 | if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { |
| 1165 | // In fork barrier where we could not get the object reliably |
| 1166 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); |
| 1167 | // Cancel wait on previous parallel region... |
| 1168 | __kmp_itt_task_starting(itt_sync_obj); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1169 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1170 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 1171 | return; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1172 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1173 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 1174 | if (itt_sync_obj != NULL) |
| 1175 | // Call prepare as early as possible for "new" barrier |
| 1176 | __kmp_itt_task_finished(itt_sync_obj); |
| 1177 | } else |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1178 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
| 1179 | // Early exit for reaping threads releasing forkjoin barrier |
| 1180 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1181 | return; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1182 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1183 | // The worker thread may now assume that the team is valid. |
| 1184 | team = __kmp_threads[gtid]->th.th_team; |
| 1185 | KMP_DEBUG_ASSERT(team != NULL); |
| 1186 | tid = __kmp_tid_from_gtid(gtid); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1187 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1188 | TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); |
| 1189 | KA_TRACE(20, |
| 1190 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", |
| 1191 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); |
| 1192 | KMP_MB(); // Flush all pending memory write invalidates. |
| 1193 | } |
| 1194 | num_threads = this_thr->th.th_team_nproc; |
| 1195 | other_threads = team->t.t_threads; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1196 | |
| 1197 | #ifdef KMP_REVERSE_HYPER_BAR |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1198 | // Count up to correct level for parent |
| 1199 | for (level = 0, offset = 1; |
| 1200 | offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0); |
| 1201 | level += branch_bits, offset <<= branch_bits) |
| 1202 | ; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1203 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1204 | // Now go down from there |
| 1205 | for (level -= branch_bits, offset >>= branch_bits; offset != 0; |
| 1206 | level -= branch_bits, offset >>= branch_bits) |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1207 | #else |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1208 | // Go down the tree, level by level |
| 1209 | for (level = 0, offset = 1; offset < num_threads; |
| 1210 | level += branch_bits, offset <<= branch_bits) |
| 1211 | #endif // KMP_REVERSE_HYPER_BAR |
| 1212 | { |
| 1213 | #ifdef KMP_REVERSE_HYPER_BAR |
| 1214 | /* Now go in reverse order through the children, highest to lowest. |
| 1215 | Initial setting of child is conservative here. */ |
| 1216 | child = num_threads >> ((level == 0) ? level : level - 1); |
| 1217 | for (child = (child < branch_factor - 1) ? child : branch_factor - 1, |
| 1218 | child_tid = tid + (child << level); |
| 1219 | child >= 1; child--, child_tid -= (1 << level)) |
| 1220 | #else |
| 1221 | if (((tid >> level) & (branch_factor - 1)) != 0) |
| 1222 | // No need to go lower than this, since this is the level parent would be |
| 1223 | // notified |
| 1224 | break; |
| 1225 | // Iterate through children on this level of the tree |
| 1226 | for (child = 1, child_tid = tid + (1 << level); |
| 1227 | child < branch_factor && child_tid < num_threads; |
| 1228 | child++, child_tid += (1 << level)) |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1229 | #endif // KMP_REVERSE_HYPER_BAR |
| 1230 | { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1231 | if (child_tid >= num_threads) |
| 1232 | continue; // Child doesn't exist so keep going |
| 1233 | else { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1234 | kmp_info_t *child_thr = other_threads[child_tid]; |
| 1235 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1236 | #if KMP_CACHE_MANAGE |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1237 | kmp_uint32 next_child_tid = child_tid - (1 << level); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1238 | // Prefetch next thread's go count |
| 1239 | #ifdef KMP_REVERSE_HYPER_BAR |
| 1240 | if (child - 1 >= 1 && next_child_tid < num_threads) |
| 1241 | #else |
| 1242 | if (child + 1 < branch_factor && next_child_tid < num_threads) |
| 1243 | #endif // KMP_REVERSE_HYPER_BAR |
| 1244 | KMP_CACHE_PREFETCH( |
| 1245 | &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1246 | #endif /* KMP_CACHE_MANAGE */ |
| 1247 | |
| 1248 | #if KMP_BARRIER_ICV_PUSH |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1249 | if (propagate_icvs) // push my fixed ICVs to my child |
| 1250 | copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1251 | #endif // KMP_BARRIER_ICV_PUSH |
| 1252 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1253 | KA_TRACE( |
| 1254 | 20, |
| 1255 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" |
| 1256 | "go(%p): %u => %u\n", |
| 1257 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 1258 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, |
| 1259 | child_bar->b_go + KMP_BARRIER_STATE_BUMP)); |
| 1260 | // Release child from barrier |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1261 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1262 | flag.release(); |
| 1263 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1264 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1265 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1266 | #if KMP_BARRIER_ICV_PUSH |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1267 | if (propagate_icvs && |
| 1268 | !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest |
| 1269 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, |
| 1270 | FALSE); |
| 1271 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, |
| 1272 | &thr_bar->th_fixed_icvs); |
| 1273 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1274 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1275 | KA_TRACE( |
| 1276 | 20, |
| 1277 | ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", |
| 1278 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1279 | } |
| 1280 | |
| 1281 | // Hierarchical Barrier |
| 1282 | |
| 1283 | // Initialize thread barrier data |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1284 | /* Initializes/re-initializes the hierarchical barrier data stored on a thread. |
| 1285 | Performs the minimum amount of initialization required based on how the team |
| 1286 | has changed. Returns true if leaf children will require both on-core and |
| 1287 | traditional wake-up mechanisms. For example, if the team size increases, |
| 1288 | threads already in the team will respond to on-core wakeup on their parent |
| 1289 | thread, but threads newly added to the team will only be listening on the |
| 1290 | their local b_go. */ |
| 1291 | static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt, |
| 1292 | kmp_bstate_t *thr_bar, |
| 1293 | kmp_uint32 nproc, int gtid, |
| 1294 | int tid, kmp_team_t *team) { |
| 1295 | // Checks to determine if (re-)initialization is needed |
| 1296 | bool uninitialized = thr_bar->team == NULL; |
| 1297 | bool team_changed = team != thr_bar->team; |
| 1298 | bool team_sz_changed = nproc != thr_bar->nproc; |
| 1299 | bool tid_changed = tid != thr_bar->old_tid; |
| 1300 | bool retval = false; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1301 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1302 | if (uninitialized || team_sz_changed) { |
| 1303 | __kmp_get_hierarchy(nproc, thr_bar); |
| 1304 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1305 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1306 | if (uninitialized || team_sz_changed || tid_changed) { |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1307 | thr_bar->my_level = thr_bar->depth - 1; // default for primary thread |
| 1308 | thr_bar->parent_tid = -1; // default for primary thread |
| 1309 | if (!KMP_MASTER_TID(tid)) { |
| 1310 | // if not primary thread, find parent thread in hierarchy |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1311 | kmp_uint32 d = 0; |
| 1312 | while (d < thr_bar->depth) { // find parent based on level of thread in |
| 1313 | // hierarchy, and note level |
| 1314 | kmp_uint32 rem; |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1315 | if (d == thr_bar->depth - 2) { // reached level right below the primary |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1316 | thr_bar->parent_tid = 0; |
| 1317 | thr_bar->my_level = d; |
| 1318 | break; |
Terry Wilmarth | 8f222cb | 2021-01-13 10:09:39 -0600 | [diff] [blame] | 1319 | } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) { |
| 1320 | // TODO: can we make the above op faster? |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1321 | // thread is not a subtree root at next level, so this is max |
| 1322 | thr_bar->parent_tid = tid - rem; |
| 1323 | thr_bar->my_level = d; |
| 1324 | break; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1325 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1326 | ++d; |
| 1327 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1328 | } |
Terry Wilmarth | 8f222cb | 2021-01-13 10:09:39 -0600 | [diff] [blame] | 1329 | __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) / |
| 1330 | (thr_bar->skip_per_level[thr_bar->my_level])), |
| 1331 | &(thr_bar->offset)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1332 | thr_bar->old_tid = tid; |
| 1333 | thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; |
| 1334 | thr_bar->team = team; |
| 1335 | thr_bar->parent_bar = |
| 1336 | &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; |
| 1337 | } |
| 1338 | if (uninitialized || team_changed || tid_changed) { |
| 1339 | thr_bar->team = team; |
| 1340 | thr_bar->parent_bar = |
| 1341 | &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; |
| 1342 | retval = true; |
| 1343 | } |
| 1344 | if (uninitialized || team_sz_changed || tid_changed) { |
| 1345 | thr_bar->nproc = nproc; |
| 1346 | thr_bar->leaf_kids = thr_bar->base_leaf_kids; |
| 1347 | if (thr_bar->my_level == 0) |
| 1348 | thr_bar->leaf_kids = 0; |
| 1349 | if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc) |
Terry Wilmarth | c368678 | 2020-12-31 00:39:48 +0300 | [diff] [blame] | 1350 | __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1351 | thr_bar->leaf_state = 0; |
| 1352 | for (int i = 0; i < thr_bar->leaf_kids; ++i) |
| 1353 | ((char *)&(thr_bar->leaf_state))[7 - i] = 1; |
| 1354 | } |
| 1355 | return retval; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1356 | } |
| 1357 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1358 | static void __kmp_hierarchical_barrier_gather( |
| 1359 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 1360 | void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 1361 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1362 | kmp_team_t *team = this_thr->th.th_team; |
| 1363 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 1364 | kmp_uint32 nproc = this_thr->th.th_team_nproc; |
| 1365 | kmp_info_t **other_threads = team->t.t_threads; |
Hansang Bae | a3918ee | 2021-03-11 17:34:06 -0600 | [diff] [blame] | 1366 | kmp_uint64 new_state = 0; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1367 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1368 | int level = team->t.t_level; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1369 | if (other_threads[0] |
| 1370 | ->th.th_teams_microtask) // are we inside the teams construct? |
| 1371 | if (this_thr->th.th_teams_size.nteams > 1) |
| 1372 | ++level; // level was not increased in teams construct for team_of_masters |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1373 | if (level == 1) |
| 1374 | thr_bar->use_oncore_barrier = 1; |
| 1375 | else |
| 1376 | thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1377 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1378 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " |
| 1379 | "barrier type %d\n", |
| 1380 | gtid, team->t.t_id, tid, bt)); |
| 1381 | KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1382 | |
Andrey Churbanov | 6a1be00 | 2015-05-06 18:34:15 +0000 | [diff] [blame] | 1383 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1384 | // Barrier imbalance - save arrive time to the thread |
| 1385 | if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { |
| 1386 | this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); |
| 1387 | } |
Andrey Churbanov | 6a1be00 | 2015-05-06 18:34:15 +0000 | [diff] [blame] | 1388 | #endif |
| 1389 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1390 | (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, |
| 1391 | team); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1392 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1393 | if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1394 | kmp_int32 child_tid; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1395 | new_state = |
| 1396 | (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; |
| 1397 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && |
| 1398 | thr_bar->use_oncore_barrier) { |
Andrey Churbanov | b87d16a | 2017-07-17 09:03:14 +0000 | [diff] [blame] | 1399 | if (thr_bar->leaf_kids) { |
| 1400 | // First, wait for leaf children to check-in on my b_arrived flag |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1401 | kmp_uint64 leaf_state = |
| 1402 | KMP_MASTER_TID(tid) |
| 1403 | ? thr_bar->b_arrived | thr_bar->leaf_state |
| 1404 | : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; |
| 1405 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " |
| 1406 | "for leaf kids\n", |
| 1407 | gtid, team->t.t_id, tid)); |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1408 | kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1409 | flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1410 | if (reduce) { |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 1411 | OMPT_REDUCTION_DECL(this_thr, gtid); |
| 1412 | OMPT_REDUCTION_BEGIN; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1413 | for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids; |
| 1414 | ++child_tid) { |
| 1415 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " |
| 1416 | "T#%d(%d:%d)\n", |
| 1417 | gtid, team->t.t_id, tid, |
| 1418 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1419 | child_tid)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1420 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 1421 | other_threads[child_tid]->th.th_local.reduce_data); |
| 1422 | } |
protze@itc.rwth-aachen.de | 5210bae | 2019-12-27 14:39:50 +0100 | [diff] [blame] | 1423 | OMPT_REDUCTION_END; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1424 | } |
Andrey Churbanov | 2356d3d | 2017-07-03 11:24:08 +0000 | [diff] [blame] | 1425 | // clear leaf_state bits |
Andrey Churbanov | b87d16a | 2017-07-17 09:03:14 +0000 | [diff] [blame] | 1426 | KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1427 | } |
| 1428 | // Next, wait for higher level children on each child's b_arrived flag |
| 1429 | for (kmp_uint32 d = 1; d < thr_bar->my_level; |
| 1430 | ++d) { // gather lowest level threads first, but skip 0 |
| 1431 | kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], |
| 1432 | skip = thr_bar->skip_per_level[d]; |
| 1433 | if (last > nproc) |
| 1434 | last = nproc; |
| 1435 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1436 | kmp_info_t *child_thr = other_threads[child_tid]; |
| 1437 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1438 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " |
| 1439 | "T#%d(%d:%d) " |
| 1440 | "arrived(%p) == %llu\n", |
| 1441 | gtid, team->t.t_id, tid, |
| 1442 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1443 | child_tid, &child_bar->b_arrived, new_state)); |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1444 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1445 | flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1446 | if (reduce) { |
| 1447 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " |
| 1448 | "T#%d(%d:%d)\n", |
| 1449 | gtid, team->t.t_id, tid, |
| 1450 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1451 | child_tid)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1452 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 1453 | child_thr->th.th_local.reduce_data); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1454 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1455 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1456 | } |
| 1457 | } else { // Blocktime is not infinite |
| 1458 | for (kmp_uint32 d = 0; d < thr_bar->my_level; |
| 1459 | ++d) { // Gather lowest level threads first |
| 1460 | kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], |
| 1461 | skip = thr_bar->skip_per_level[d]; |
| 1462 | if (last > nproc) |
| 1463 | last = nproc; |
| 1464 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1465 | kmp_info_t *child_thr = other_threads[child_tid]; |
| 1466 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1467 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " |
| 1468 | "T#%d(%d:%d) " |
| 1469 | "arrived(%p) == %llu\n", |
| 1470 | gtid, team->t.t_id, tid, |
| 1471 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1472 | child_tid, &child_bar->b_arrived, new_state)); |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1473 | kmp_flag_64<> flag(&child_bar->b_arrived, new_state); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1474 | flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1475 | if (reduce) { |
| 1476 | KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " |
| 1477 | "T#%d(%d:%d)\n", |
| 1478 | gtid, team->t.t_id, tid, |
| 1479 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1480 | child_tid)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1481 | (*reduce)(this_thr->th.th_local.reduce_data, |
| 1482 | child_thr->th.th_local.reduce_data); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1483 | } |
| 1484 | } |
| 1485 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1486 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1487 | } |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1488 | // All subordinates are gathered; now release parent if not primary thread |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1489 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1490 | if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy |
Jonathan Peyton | b260320 | 2018-07-09 17:36:22 +0000 | [diff] [blame] | 1491 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" |
| 1492 | " T#%d(%d:%d) arrived(%p): %llu => %llu\n", |
| 1493 | gtid, team->t.t_id, tid, |
| 1494 | __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, |
| 1495 | thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived, |
| 1496 | thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1497 | /* Mark arrival to parent: After performing this write, a worker thread may |
| 1498 | not assume that the team is valid any more - it could be deallocated by |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1499 | the primary thread at any time. */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1500 | if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || |
| 1501 | !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived |
| 1502 | // flag; release it |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1503 | kmp_flag_64<> flag(&thr_bar->b_arrived, |
| 1504 | other_threads[thr_bar->parent_tid]); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1505 | flag.release(); |
Jonathan Peyton | b260320 | 2018-07-09 17:36:22 +0000 | [diff] [blame] | 1506 | } else { |
| 1507 | // Leaf does special release on "offset" bits of parent's b_arrived flag |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1508 | thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; |
Terry Wilmarth | 8f222cb | 2021-01-13 10:09:39 -0600 | [diff] [blame] | 1509 | kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, |
| 1510 | thr_bar->offset + 1); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1511 | flag.set_waiter(other_threads[thr_bar->parent_tid]); |
| 1512 | flag.release(); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1513 | } |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1514 | } else { // Primary thread needs to update the team's b_arrived value |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1515 | team->t.t_bar[bt].b_arrived = new_state; |
| 1516 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " |
| 1517 | "arrived(%p) = %llu\n", |
| 1518 | gtid, team->t.t_id, tid, team->t.t_id, |
| 1519 | &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); |
| 1520 | } |
| 1521 | // Is the team access below unsafe or just technically invalid? |
| 1522 | KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " |
| 1523 | "barrier type %d\n", |
| 1524 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1525 | } |
| 1526 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1527 | static void __kmp_hierarchical_barrier_release( |
| 1528 | enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, |
| 1529 | int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { |
| 1530 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1531 | kmp_team_t *team; |
| 1532 | kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; |
| 1533 | kmp_uint32 nproc; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1534 | bool team_change = false; // indicates on-core barrier shouldn't be used |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1535 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1536 | if (KMP_MASTER_TID(tid)) { |
| 1537 | team = __kmp_threads[gtid]->th.th_team; |
| 1538 | KMP_DEBUG_ASSERT(team != NULL); |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1539 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary " |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1540 | "entered barrier type %d\n", |
| 1541 | gtid, team->t.t_id, tid, bt)); |
| 1542 | } else { // Worker threads |
| 1543 | // Wait for parent thread to release me |
| 1544 | if (!thr_bar->use_oncore_barrier || |
| 1545 | __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 || |
| 1546 | thr_bar->team == NULL) { |
| 1547 | // Use traditional method of waiting on my own b_go flag |
| 1548 | thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1549 | kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1550 | flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1551 | TCW_8(thr_bar->b_go, |
| 1552 | KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time |
| 1553 | } else { // Thread barrier data is initialized, this is a leaf, blocktime is |
| 1554 | // infinite, not nested |
| 1555 | // Wait on my "offset" bits on parent's b_go flag |
| 1556 | thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; |
| 1557 | kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, |
Terry Wilmarth | 8f222cb | 2021-01-13 10:09:39 -0600 | [diff] [blame] | 1558 | thr_bar->offset + 1, bt, |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1559 | this_thr USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1560 | flag.wait(this_thr, TRUE); |
| 1561 | if (thr_bar->wait_flag == |
| 1562 | KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go |
| 1563 | TCW_8(thr_bar->b_go, |
| 1564 | KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time |
| 1565 | } else { // Reset my bits on parent's b_go flag |
Andrey Churbanov | 2356d3d | 2017-07-03 11:24:08 +0000 | [diff] [blame] | 1566 | (RCAST(volatile char *, |
Terry Wilmarth | 8f222cb | 2021-01-13 10:09:39 -0600 | [diff] [blame] | 1567 | &(thr_bar->parent_bar->b_go)))[thr_bar->offset + 1] = 0; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1568 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1569 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1570 | thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; |
| 1571 | // Early exit for reaping threads releasing forkjoin barrier |
| 1572 | if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) |
| 1573 | return; |
| 1574 | // The worker thread may now assume that the team is valid. |
| 1575 | team = __kmp_threads[gtid]->th.th_team; |
| 1576 | KMP_DEBUG_ASSERT(team != NULL); |
| 1577 | tid = __kmp_tid_from_gtid(gtid); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1578 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1579 | KA_TRACE( |
| 1580 | 20, |
| 1581 | ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", |
| 1582 | gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); |
| 1583 | KMP_MB(); // Flush all pending memory write invalidates. |
| 1584 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1585 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1586 | nproc = this_thr->th.th_team_nproc; |
| 1587 | int level = team->t.t_level; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1588 | if (team->t.t_threads[0] |
| 1589 | ->th.th_teams_microtask) { // are we inside the teams construct? |
| 1590 | if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && |
| 1591 | this_thr->th.th_teams_level == level) |
| 1592 | ++level; // level was not increased in teams construct for team_of_workers |
| 1593 | if (this_thr->th.th_teams_size.nteams > 1) |
| 1594 | ++level; // level was not increased in teams construct for team_of_masters |
| 1595 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1596 | if (level == 1) |
| 1597 | thr_bar->use_oncore_barrier = 1; |
| 1598 | else |
| 1599 | thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1600 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1601 | // If the team size has increased, we still communicate with old leaves via |
| 1602 | // oncore barrier. |
| 1603 | unsigned short int old_leaf_kids = thr_bar->leaf_kids; |
| 1604 | kmp_uint64 old_leaf_state = thr_bar->leaf_state; |
| 1605 | team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, |
| 1606 | tid, team); |
| 1607 | // But if the entire team changes, we won't use oncore barrier at all |
| 1608 | if (team_change) |
| 1609 | old_leaf_kids = 0; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1610 | |
| 1611 | #if KMP_BARRIER_ICV_PUSH |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1612 | if (propagate_icvs) { |
| 1613 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, |
| 1614 | FALSE); |
| 1615 | if (KMP_MASTER_TID( |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1616 | tid)) { // primary already has copy in final destination; copy |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1617 | copy_icvs(&thr_bar->th_fixed_icvs, |
| 1618 | &team->t.t_implicit_task_taskdata[tid].td_icvs); |
| 1619 | } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && |
| 1620 | thr_bar->use_oncore_barrier) { // optimization for inf blocktime |
| 1621 | if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) |
| 1622 | // leaves (on-core children) pull parent's fixed ICVs directly to local |
| 1623 | // ICV store |
| 1624 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, |
| 1625 | &thr_bar->parent_bar->th_fixed_icvs); |
| 1626 | // non-leaves will get ICVs piggybacked with b_go via NGO store |
| 1627 | } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs |
| 1628 | if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can |
| 1629 | // access |
| 1630 | copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); |
| 1631 | else // leaves copy parent's fixed ICVs directly to local ICV store |
| 1632 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, |
| 1633 | &thr_bar->parent_bar->th_fixed_icvs); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1634 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1635 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1636 | #endif // KMP_BARRIER_ICV_PUSH |
| 1637 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1638 | // Now, release my children |
| 1639 | if (thr_bar->my_level) { // not a leaf |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1640 | kmp_int32 child_tid; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1641 | kmp_uint32 last; |
| 1642 | if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && |
| 1643 | thr_bar->use_oncore_barrier) { |
| 1644 | if (KMP_MASTER_TID(tid)) { // do a flat release |
| 1645 | // Set local b_go to bump children via NGO store of the cache line |
| 1646 | // containing IVCs and b_go. |
| 1647 | thr_bar->b_go = KMP_BARRIER_STATE_BUMP; |
| 1648 | // Use ngo stores if available; b_go piggybacks in the last 8 bytes of |
| 1649 | // the cache line |
| 1650 | ngo_load(&thr_bar->th_fixed_icvs); |
| 1651 | // This loops over all the threads skipping only the leaf nodes in the |
| 1652 | // hierarchy |
| 1653 | for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc; |
| 1654 | child_tid += thr_bar->skip_per_level[1]) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1655 | kmp_bstate_t *child_bar = |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1656 | &team->t.t_threads[child_tid]->th.th_bar[bt].bb; |
| 1657 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " |
| 1658 | "releasing T#%d(%d:%d)" |
| 1659 | " go(%p): %u => %u\n", |
| 1660 | gtid, team->t.t_id, tid, |
| 1661 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1662 | child_tid, &child_bar->b_go, child_bar->b_go, |
| 1663 | child_bar->b_go + KMP_BARRIER_STATE_BUMP)); |
| 1664 | // Use ngo store (if available) to both store ICVs and release child |
| 1665 | // via child's b_go |
| 1666 | ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1667 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1668 | ngo_sync(); |
| 1669 | } |
| 1670 | TCW_8(thr_bar->b_go, |
| 1671 | KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time |
| 1672 | // Now, release leaf children |
| 1673 | if (thr_bar->leaf_kids) { // if there are any |
| 1674 | // We test team_change on the off-chance that the level 1 team changed. |
| 1675 | if (team_change || |
| 1676 | old_leaf_kids < thr_bar->leaf_kids) { // some old, some new |
| 1677 | if (old_leaf_kids) { // release old leaf kids |
| 1678 | thr_bar->b_go |= old_leaf_state; |
| 1679 | } |
| 1680 | // Release new leaf kids |
| 1681 | last = tid + thr_bar->skip_per_level[1]; |
| 1682 | if (last > nproc) |
| 1683 | last = nproc; |
| 1684 | for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last; |
| 1685 | ++child_tid) { // skip_per_level[0]=1 |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1686 | kmp_info_t *child_thr = team->t.t_threads[child_tid]; |
| 1687 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1688 | KA_TRACE( |
| 1689 | 20, |
| 1690 | ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" |
| 1691 | " T#%d(%d:%d) go(%p): %u => %u\n", |
| 1692 | gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), |
| 1693 | team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, |
| 1694 | child_bar->b_go + KMP_BARRIER_STATE_BUMP)); |
| 1695 | // Release child using child's b_go flag |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1696 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1697 | flag.release(); |
| 1698 | } |
| 1699 | } else { // Release all children at once with leaf_state bits on my own |
| 1700 | // b_go flag |
| 1701 | thr_bar->b_go |= thr_bar->leaf_state; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1702 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1703 | } |
| 1704 | } else { // Blocktime is not infinite; do a simple hierarchical release |
| 1705 | for (int d = thr_bar->my_level - 1; d >= 0; |
| 1706 | --d) { // Release highest level threads first |
| 1707 | last = tid + thr_bar->skip_per_level[d + 1]; |
| 1708 | kmp_uint32 skip = thr_bar->skip_per_level[d]; |
| 1709 | if (last > nproc) |
| 1710 | last = nproc; |
| 1711 | for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1712 | kmp_info_t *child_thr = team->t.t_threads[child_tid]; |
| 1713 | kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1714 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " |
| 1715 | "releasing T#%d(%d:%d) go(%p): %u => %u\n", |
| 1716 | gtid, team->t.t_id, tid, |
| 1717 | __kmp_gtid_from_tid(child_tid, team), team->t.t_id, |
| 1718 | child_tid, &child_bar->b_go, child_bar->b_go, |
| 1719 | child_bar->b_go + KMP_BARRIER_STATE_BUMP)); |
| 1720 | // Release child using child's b_go flag |
Terry Wilmarth | 7139913 | 2020-12-01 14:03:40 -0600 | [diff] [blame] | 1721 | kmp_flag_64<> flag(&child_bar->b_go, child_thr); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1722 | flag.release(); |
| 1723 | } |
| 1724 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1725 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1726 | #if KMP_BARRIER_ICV_PUSH |
| 1727 | if (propagate_icvs && !KMP_MASTER_TID(tid)) |
| 1728 | // non-leaves copy ICVs from fixed ICVs to local dest |
| 1729 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, |
| 1730 | &thr_bar->th_fixed_icvs); |
| 1731 | #endif // KMP_BARRIER_ICV_PUSH |
| 1732 | } |
| 1733 | KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " |
| 1734 | "barrier type %d\n", |
| 1735 | gtid, team->t.t_id, tid, bt)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1736 | } |
| 1737 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1738 | // End of Barrier Algorithms |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1739 | |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1740 | // type traits for cancellable value |
| 1741 | // if cancellable is true, then is_cancellable is a normal boolean variable |
| 1742 | // if cancellable is false, then is_cancellable is a compile time constant |
| 1743 | template <bool cancellable> struct is_cancellable {}; |
| 1744 | template <> struct is_cancellable<true> { |
| 1745 | bool value; |
| 1746 | is_cancellable() : value(false) {} |
| 1747 | is_cancellable(bool b) : value(b) {} |
| 1748 | is_cancellable &operator=(bool b) { |
| 1749 | value = b; |
| 1750 | return *this; |
| 1751 | } |
| 1752 | operator bool() const { return value; } |
| 1753 | }; |
| 1754 | template <> struct is_cancellable<false> { |
| 1755 | is_cancellable &operator=(bool b) { return *this; } |
| 1756 | constexpr operator bool() const { return false; } |
| 1757 | }; |
| 1758 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1759 | // Internal function to do a barrier. |
| 1760 | /* If is_split is true, do a split barrier, otherwise, do a plain barrier |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1761 | If reduce is non-NULL, do a split reduction barrier, otherwise, do a split |
| 1762 | barrier |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1763 | When cancellable = false, |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1764 | Returns 0 if primary thread, 1 if worker thread. |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1765 | When cancellable = true |
| 1766 | Returns 0 if not cancelled, 1 if cancelled. */ |
| 1767 | template <bool cancellable = false> |
| 1768 | static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, |
| 1769 | size_t reduce_size, void *reduce_data, |
| 1770 | void (*reduce)(void *, void *)) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1771 | KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier); |
| 1772 | KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 1773 | int tid = __kmp_tid_from_gtid(gtid); |
| 1774 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
| 1775 | kmp_team_t *team = this_thr->th.th_team; |
| 1776 | int status = 0; |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1777 | is_cancellable<cancellable> cancelled; |
Joachim Protze | 62a68bd | 2018-09-10 14:34:54 +0000 | [diff] [blame] | 1778 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 1779 | ompt_data_t *my_task_data; |
| 1780 | ompt_data_t *my_parallel_data; |
| 1781 | void *return_address; |
Jonathan Peyton | 6f8ffce | 2019-02-28 20:55:39 +0000 | [diff] [blame] | 1782 | ompt_sync_region_t barrier_kind; |
Andrey Churbanov | 82ccfaa | 2015-04-29 16:42:24 +0000 | [diff] [blame] | 1783 | #endif |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1784 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1785 | KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid, |
| 1786 | __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1787 | |
Jonathan Peyton | 9e4474d | 2015-06-29 17:28:57 +0000 | [diff] [blame] | 1788 | #if OMPT_SUPPORT |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 1789 | if (ompt_enabled.enabled) { |
| 1790 | #if OMPT_OPTIONAL |
| 1791 | my_task_data = OMPT_CUR_TASK_DATA(this_thr); |
| 1792 | my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); |
| 1793 | return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); |
Jonathan Peyton | 6f8ffce | 2019-02-28 20:55:39 +0000 | [diff] [blame] | 1794 | barrier_kind = __ompt_get_barrier_kind(bt, this_thr); |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 1795 | if (ompt_enabled.ompt_callback_sync_region) { |
| 1796 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
Jonathan Peyton | 6f8ffce | 2019-02-28 20:55:39 +0000 | [diff] [blame] | 1797 | barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data, |
| 1798 | return_address); |
Andrey Churbanov | 82ccfaa | 2015-04-29 16:42:24 +0000 | [diff] [blame] | 1799 | } |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 1800 | if (ompt_enabled.ompt_callback_sync_region_wait) { |
| 1801 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
Jonathan Peyton | 6f8ffce | 2019-02-28 20:55:39 +0000 | [diff] [blame] | 1802 | barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data, |
| 1803 | return_address); |
Andrey Churbanov | 82ccfaa | 2015-04-29 16:42:24 +0000 | [diff] [blame] | 1804 | } |
Jonathan Peyton | 9e4474d | 2015-06-29 17:28:57 +0000 | [diff] [blame] | 1805 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1806 | // It is OK to report the barrier state after the barrier begin callback. |
| 1807 | // According to the OMPT specification, a compliant implementation may |
| 1808 | // even delay reporting this state until the barrier begins to wait. |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 1809 | auto *ompt_thr_info = &this_thr->th.ompt_thread_info; |
| 1810 | switch (barrier_kind) { |
| 1811 | case ompt_sync_region_barrier_explicit: |
| 1812 | ompt_thr_info->state = ompt_state_wait_barrier_explicit; |
| 1813 | break; |
| 1814 | case ompt_sync_region_barrier_implicit_workshare: |
| 1815 | ompt_thr_info->state = ompt_state_wait_barrier_implicit_workshare; |
| 1816 | break; |
| 1817 | case ompt_sync_region_barrier_implicit_parallel: |
| 1818 | ompt_thr_info->state = ompt_state_wait_barrier_implicit_parallel; |
| 1819 | break; |
| 1820 | case ompt_sync_region_barrier_teams: |
| 1821 | ompt_thr_info->state = ompt_state_wait_barrier_teams; |
| 1822 | break; |
| 1823 | case ompt_sync_region_barrier_implementation: |
| 1824 | [[fallthrough]]; |
| 1825 | default: |
| 1826 | ompt_thr_info->state = ompt_state_wait_barrier_implementation; |
| 1827 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1828 | } |
Andrey Churbanov | 82ccfaa | 2015-04-29 16:42:24 +0000 | [diff] [blame] | 1829 | #endif |
| 1830 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1831 | if (!team->t.t_serialized) { |
| 1832 | #if USE_ITT_BUILD |
| 1833 | // This value will be used in itt notify events below. |
| 1834 | void *itt_sync_obj = NULL; |
| 1835 | #if USE_ITT_NOTIFY |
| 1836 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 1837 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); |
| 1838 | #endif |
| 1839 | #endif /* USE_ITT_BUILD */ |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1840 | if (__kmp_tasking_mode == tskm_extra_barrier) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1841 | __kmp_tasking_barrier(team, this_thr, gtid); |
| 1842 | KA_TRACE(15, |
| 1843 | ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid, |
| 1844 | __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1845 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1846 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1847 | /* Copy the blocktime info to the thread, where __kmp_wait_template() can |
| 1848 | access it when the team struct is not guaranteed to exist. */ |
| 1849 | // See note about the corresponding code in __kmp_join_barrier() being |
| 1850 | // performance-critical. |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1851 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
Jonathan Peyton | 786babf | 2016-10-07 18:12:19 +0000 | [diff] [blame] | 1852 | #if KMP_USE_MONITOR |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1853 | this_thr->th.th_team_bt_intervals = |
| 1854 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; |
| 1855 | this_thr->th.th_team_bt_set = |
| 1856 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; |
Jonathan Peyton | d5c7058 | 2017-01-27 17:54:31 +0000 | [diff] [blame] | 1857 | #else |
Jonathan Peyton | 52430c6 | 2017-09-05 15:45:48 +0000 | [diff] [blame] | 1858 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid); |
Jonathan Peyton | d5c7058 | 2017-01-27 17:54:31 +0000 | [diff] [blame] | 1859 | #endif |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1860 | } |
| 1861 | |
| 1862 | #if USE_ITT_BUILD |
| 1863 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1864 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1865 | #endif /* USE_ITT_BUILD */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1866 | #if USE_DEBUGGER |
| 1867 | // Let the debugger know: the thread arrived to the barrier and waiting. |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1868 | if (KMP_MASTER_TID(tid)) { // Primary thread counter stored in team struct |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1869 | team->t.t_bar[bt].b_master_arrived += 1; |
| 1870 | } else { |
| 1871 | this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; |
| 1872 | } // if |
| 1873 | #endif /* USE_DEBUGGER */ |
| 1874 | if (reduce != NULL) { |
| 1875 | // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 |
| 1876 | this_thr->th.th_local.reduce_data = reduce_data; |
| 1877 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1878 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1879 | if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) |
Jonathan Peyton | a64c7e6 | 2024-05-07 08:41:51 -0500 | [diff] [blame] | 1880 | __kmp_task_team_setup(this_thr, team); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1881 | |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1882 | if (cancellable) { |
| 1883 | cancelled = __kmp_linear_barrier_gather_cancellable( |
| 1884 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1885 | } else { |
| 1886 | switch (__kmp_barrier_gather_pattern[bt]) { |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 1887 | case bp_dist_bar: { |
| 1888 | __kmp_dist_barrier_gather(bt, this_thr, gtid, tid, |
| 1889 | reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1890 | break; |
| 1891 | } |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1892 | case bp_hyper_bar: { |
| 1893 | // don't set branch bits to 0; use linear |
| 1894 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); |
| 1895 | __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, |
| 1896 | reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1897 | break; |
| 1898 | } |
| 1899 | case bp_hierarchical_bar: { |
| 1900 | __kmp_hierarchical_barrier_gather( |
| 1901 | bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1902 | break; |
| 1903 | } |
| 1904 | case bp_tree_bar: { |
| 1905 | // don't set branch bits to 0; use linear |
| 1906 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); |
| 1907 | __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1908 | reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1909 | break; |
| 1910 | } |
| 1911 | default: { |
| 1912 | __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, |
| 1913 | reduce USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 1914 | } |
| 1915 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1916 | } |
| 1917 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1918 | KMP_MB(); |
| 1919 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1920 | if (KMP_MASTER_TID(tid)) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1921 | status = 0; |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1922 | if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) { |
Andrey Churbanov | 2356d3d | 2017-07-03 11:24:08 +0000 | [diff] [blame] | 1923 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1924 | } |
| 1925 | #if USE_DEBUGGER |
| 1926 | // Let the debugger know: All threads are arrived and starting leaving the |
| 1927 | // barrier. |
| 1928 | team->t.t_bar[bt].b_team_arrived += 1; |
| 1929 | #endif |
| 1930 | |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1931 | if (__kmp_omp_cancellation) { |
| 1932 | kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request); |
| 1933 | // Reset cancellation flag for worksharing constructs |
| 1934 | if (cancel_request == cancel_loop || |
| 1935 | cancel_request == cancel_sections) { |
| 1936 | KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq); |
| 1937 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1938 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 1939 | #if USE_ITT_BUILD |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1940 | /* TODO: In case of split reduction barrier, primary thread may send |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1941 | acquired event early, before the final summation into the shared |
| 1942 | variable is done (final summation can be a long operation for array |
| 1943 | reductions). */ |
| 1944 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 1945 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); |
| 1946 | #endif /* USE_ITT_BUILD */ |
| 1947 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| 1948 | // Barrier - report frame end (only if active_level == 1) |
| 1949 | if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && |
| 1950 | __kmp_forkjoin_frames_mode && |
AndreyChurbanov | 967ebf1 | 2020-07-17 21:10:25 +0300 | [diff] [blame] | 1951 | (this_thr->th.th_teams_microtask == NULL || // either not in teams |
| 1952 | this_thr->th.th_teams_size.nteams == 1) && // or inside single team |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1953 | team->t.t_active_level == 1) { |
Jonathan Peyton | e9a2302 | 2018-08-09 22:04:30 +0000 | [diff] [blame] | 1954 | ident_t *loc = __kmp_threads[gtid]->th.th_ident; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1955 | kmp_uint64 cur_time = __itt_get_timestamp(); |
| 1956 | kmp_info_t **other_threads = team->t.t_threads; |
| 1957 | int nproc = this_thr->th.th_team_nproc; |
| 1958 | int i; |
| 1959 | switch (__kmp_forkjoin_frames_mode) { |
| 1960 | case 1: |
| 1961 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, |
| 1962 | loc, nproc); |
| 1963 | this_thr->th.th_frame_time = cur_time; |
| 1964 | break; |
| 1965 | case 2: // AC 2015-01-19: currently does not work for hierarchical (to |
| 1966 | // be fixed) |
| 1967 | __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, |
| 1968 | 1, loc, nproc); |
| 1969 | break; |
| 1970 | case 3: |
| 1971 | if (__itt_metadata_add_ptr) { |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 1972 | // Initialize with primary thread's wait time |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 1973 | kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; |
| 1974 | // Set arrive time to zero to be able to check it in |
| 1975 | // __kmp_invoke_task(); the same is done inside the loop below |
| 1976 | this_thr->th.th_bar_arrive_time = 0; |
| 1977 | for (i = 1; i < nproc; ++i) { |
| 1978 | delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); |
| 1979 | other_threads[i]->th.th_bar_arrive_time = 0; |
| 1980 | } |
| 1981 | __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, |
| 1982 | cur_time, delta, |
| 1983 | (kmp_uint64)(reduce != NULL)); |
| 1984 | } |
| 1985 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, |
| 1986 | loc, nproc); |
| 1987 | this_thr->th.th_frame_time = cur_time; |
| 1988 | break; |
| 1989 | } |
| 1990 | } |
| 1991 | #endif /* USE_ITT_BUILD */ |
| 1992 | } else { |
| 1993 | status = 1; |
| 1994 | #if USE_ITT_BUILD |
| 1995 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 1996 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); |
| 1997 | #endif /* USE_ITT_BUILD */ |
| 1998 | } |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 1999 | if ((status == 1 || !is_split) && !cancelled) { |
| 2000 | if (cancellable) { |
| 2001 | cancelled = __kmp_linear_barrier_release_cancellable( |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2002 | bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2003 | } else { |
| 2004 | switch (__kmp_barrier_release_pattern[bt]) { |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 2005 | case bp_dist_bar: { |
| 2006 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); |
| 2007 | __kmp_dist_barrier_release(bt, this_thr, gtid, tid, |
| 2008 | FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2009 | break; |
| 2010 | } |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2011 | case bp_hyper_bar: { |
| 2012 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); |
| 2013 | __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, |
| 2014 | FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2015 | break; |
| 2016 | } |
| 2017 | case bp_hierarchical_bar: { |
| 2018 | __kmp_hierarchical_barrier_release( |
| 2019 | bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2020 | break; |
| 2021 | } |
| 2022 | case bp_tree_bar: { |
| 2023 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); |
| 2024 | __kmp_tree_barrier_release(bt, this_thr, gtid, tid, |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2025 | FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2026 | break; |
| 2027 | } |
| 2028 | default: { |
| 2029 | __kmp_linear_barrier_release(bt, this_thr, gtid, tid, |
| 2030 | FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2031 | } |
| 2032 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2033 | } |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2034 | if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2035 | __kmp_task_team_sync(this_thr, team); |
| 2036 | } |
| 2037 | } |
| 2038 | |
| 2039 | #if USE_ITT_BUILD |
| 2040 | /* GEH: TODO: Move this under if-condition above and also include in |
| 2041 | __kmp_end_split_barrier(). This will more accurately represent the actual |
| 2042 | release time of the threads for split barriers. */ |
| 2043 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 2044 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); |
| 2045 | #endif /* USE_ITT_BUILD */ |
| 2046 | } else { // Team is serialized. |
| 2047 | status = 0; |
| 2048 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2049 | if (this_thr->th.th_task_team != NULL) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2050 | #if USE_ITT_NOTIFY |
Jonathan Peyton | c90278f | 2018-08-24 18:03:27 +0000 | [diff] [blame] | 2051 | void *itt_sync_obj = NULL; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2052 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { |
| 2053 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); |
| 2054 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); |
| 2055 | } |
| 2056 | #endif |
| 2057 | |
Shilei Tian | e34c7a3 | 2021-12-29 23:22:37 -0500 | [diff] [blame] | 2058 | KMP_DEBUG_ASSERT( |
| 2059 | this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE || |
| 2060 | this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == |
| 2061 | TRUE); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2062 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); |
Jonathan Peyton | a64c7e6 | 2024-05-07 08:41:51 -0500 | [diff] [blame] | 2063 | __kmp_task_team_setup(this_thr, team); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2064 | |
| 2065 | #if USE_ITT_BUILD |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2066 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2067 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); |
| 2068 | #endif /* USE_ITT_BUILD */ |
| 2069 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2070 | } |
| 2071 | } |
| 2072 | KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", |
| 2073 | gtid, __kmp_team_from_gtid(gtid)->t.t_id, |
| 2074 | __kmp_tid_from_gtid(gtid), status)); |
| 2075 | |
| 2076 | #if OMPT_SUPPORT |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2077 | if (ompt_enabled.enabled) { |
| 2078 | #if OMPT_OPTIONAL |
| 2079 | if (ompt_enabled.ompt_callback_sync_region_wait) { |
| 2080 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
Jonathan Peyton | 6f8ffce | 2019-02-28 20:55:39 +0000 | [diff] [blame] | 2081 | barrier_kind, ompt_scope_end, my_parallel_data, my_task_data, |
| 2082 | return_address); |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2083 | } |
| 2084 | if (ompt_enabled.ompt_callback_sync_region) { |
| 2085 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
Jonathan Peyton | 6f8ffce | 2019-02-28 20:55:39 +0000 | [diff] [blame] | 2086 | barrier_kind, ompt_scope_end, my_parallel_data, my_task_data, |
| 2087 | return_address); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2088 | } |
| 2089 | #endif |
Joachim Protze | 96ae997 | 2018-12-18 08:52:30 +0000 | [diff] [blame] | 2090 | this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2091 | } |
| 2092 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2093 | |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2094 | if (cancellable) |
| 2095 | return (int)cancelled; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2096 | return status; |
| 2097 | } |
| 2098 | |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2099 | // Returns 0 if primary thread, 1 if worker thread. |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2100 | int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, |
| 2101 | size_t reduce_size, void *reduce_data, |
| 2102 | void (*reduce)(void *, void *)) { |
| 2103 | return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data, |
| 2104 | reduce); |
| 2105 | } |
| 2106 | |
| 2107 | #if defined(KMP_GOMP_COMPAT) |
| 2108 | // Returns 1 if cancelled, 0 otherwise |
| 2109 | int __kmp_barrier_gomp_cancel(int gtid) { |
| 2110 | if (__kmp_omp_cancellation) { |
| 2111 | int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE, |
| 2112 | 0, NULL, NULL); |
| 2113 | if (cancelled) { |
| 2114 | int tid = __kmp_tid_from_gtid(gtid); |
| 2115 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
| 2116 | if (KMP_MASTER_TID(tid)) { |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2117 | // Primary thread does not need to revert anything |
Jonathan Peyton | 2d9c27e | 2019-02-19 18:47:57 +0000 | [diff] [blame] | 2118 | } else { |
| 2119 | // Workers need to revert their private b_arrived flag |
| 2120 | this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -= |
| 2121 | KMP_BARRIER_STATE_BUMP; |
| 2122 | } |
| 2123 | } |
| 2124 | return cancelled; |
| 2125 | } |
| 2126 | __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); |
| 2127 | return FALSE; |
| 2128 | } |
| 2129 | #endif |
| 2130 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2131 | void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { |
| 2132 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier); |
| 2133 | KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); |
Hansang Bae | a3918ee | 2021-03-11 17:34:06 -0600 | [diff] [blame] | 2134 | KMP_DEBUG_ASSERT(bt < bs_last_barrier); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2135 | int tid = __kmp_tid_from_gtid(gtid); |
| 2136 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
| 2137 | kmp_team_t *team = this_thr->th.th_team; |
| 2138 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2139 | if (!team->t.t_serialized) { |
| 2140 | if (KMP_MASTER_GTID(gtid)) { |
| 2141 | switch (__kmp_barrier_release_pattern[bt]) { |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 2142 | case bp_dist_bar: { |
| 2143 | __kmp_dist_barrier_release(bt, this_thr, gtid, tid, |
| 2144 | FALSE USE_ITT_BUILD_ARG(NULL)); |
| 2145 | break; |
| 2146 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2147 | case bp_hyper_bar: { |
| 2148 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); |
| 2149 | __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, |
| 2150 | FALSE USE_ITT_BUILD_ARG(NULL)); |
| 2151 | break; |
| 2152 | } |
| 2153 | case bp_hierarchical_bar: { |
| 2154 | __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, |
| 2155 | FALSE USE_ITT_BUILD_ARG(NULL)); |
| 2156 | break; |
| 2157 | } |
| 2158 | case bp_tree_bar: { |
| 2159 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); |
| 2160 | __kmp_tree_barrier_release(bt, this_thr, gtid, tid, |
| 2161 | FALSE USE_ITT_BUILD_ARG(NULL)); |
| 2162 | break; |
| 2163 | } |
| 2164 | default: { |
| 2165 | __kmp_linear_barrier_release(bt, this_thr, gtid, tid, |
| 2166 | FALSE USE_ITT_BUILD_ARG(NULL)); |
| 2167 | } |
| 2168 | } |
| 2169 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
| 2170 | __kmp_task_team_sync(this_thr, team); |
| 2171 | } // if |
| 2172 | } |
| 2173 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2174 | } |
| 2175 | |
| 2176 | void __kmp_join_barrier(int gtid) { |
| 2177 | KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier); |
| 2178 | KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); |
Hansang Bae | a3918ee | 2021-03-11 17:34:06 -0600 | [diff] [blame] | 2179 | |
| 2180 | KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); |
| 2181 | |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 2182 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
| 2183 | kmp_team_t *team; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2184 | int tid; |
| 2185 | #ifdef KMP_DEBUG |
| 2186 | int team_id; |
| 2187 | #endif /* KMP_DEBUG */ |
| 2188 | #if USE_ITT_BUILD |
| 2189 | void *itt_sync_obj = NULL; |
| 2190 | #if USE_ITT_NOTIFY |
| 2191 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need |
| 2192 | // Get object created at fork_barrier |
| 2193 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 2194 | #endif |
| 2195 | #endif /* USE_ITT_BUILD */ |
Jonathan Peyton | edc7f6d | 2022-01-31 10:04:49 -0600 | [diff] [blame] | 2196 | #if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG) |
| 2197 | int nproc = this_thr->th.th_team_nproc; |
| 2198 | #endif |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2199 | KMP_MB(); |
| 2200 | |
| 2201 | // Get current info |
| 2202 | team = this_thr->th.th_team; |
Jonathan Peyton | edc7f6d | 2022-01-31 10:04:49 -0600 | [diff] [blame] | 2203 | KMP_DEBUG_ASSERT(nproc == team->t.t_nproc); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2204 | tid = __kmp_tid_from_gtid(gtid); |
| 2205 | #ifdef KMP_DEBUG |
| 2206 | team_id = team->t.t_id; |
AndreyChurbanov | bb76e13 | 2021-07-30 17:04:42 +0300 | [diff] [blame] | 2207 | kmp_info_t *master_thread = this_thr->th.th_team_master; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2208 | if (master_thread != team->t.t_threads[0]) { |
| 2209 | __kmp_print_structure(); |
| 2210 | } |
| 2211 | #endif /* KMP_DEBUG */ |
| 2212 | KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); |
| 2213 | KMP_MB(); |
| 2214 | |
| 2215 | // Verify state |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2216 | KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); |
| 2217 | KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); |
| 2218 | KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); |
| 2219 | KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", |
| 2220 | gtid, team_id, tid)); |
| 2221 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2222 | #if OMPT_SUPPORT |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2223 | if (ompt_enabled.enabled) { |
| 2224 | #if OMPT_OPTIONAL |
Joachim Protze | 62a68bd | 2018-09-10 14:34:54 +0000 | [diff] [blame] | 2225 | ompt_data_t *my_task_data; |
| 2226 | ompt_data_t *my_parallel_data; |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2227 | void *codeptr = NULL; |
| 2228 | int ds_tid = this_thr->th.th_info.ds.ds_tid; |
| 2229 | if (KMP_MASTER_TID(ds_tid) && |
| 2230 | (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || |
| 2231 | ompt_callbacks.ompt_callback(ompt_callback_sync_region))) |
| 2232 | codeptr = team->t.ompt_team_info.master_return_address; |
| 2233 | my_task_data = OMPT_CUR_TASK_DATA(this_thr); |
| 2234 | my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2235 | ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel; |
| 2236 | ompt_state_t ompt_state = ompt_state_wait_barrier_implicit_parallel; |
| 2237 | if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) { |
| 2238 | sync_kind = ompt_sync_region_barrier_teams; |
| 2239 | ompt_state = ompt_state_wait_barrier_teams; |
| 2240 | } |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2241 | if (ompt_enabled.ompt_callback_sync_region) { |
| 2242 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2243 | sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr); |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2244 | } |
| 2245 | if (ompt_enabled.ompt_callback_sync_region_wait) { |
| 2246 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2247 | sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr); |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2248 | } |
Joachim Protze | 201c6ca | 2018-01-10 12:51:27 +0000 | [diff] [blame] | 2249 | if (!KMP_MASTER_TID(ds_tid)) |
| 2250 | this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2251 | #endif |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2252 | this_thr->th.ompt_thread_info.state = ompt_state; |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2253 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2254 | #endif |
| 2255 | |
| 2256 | if (__kmp_tasking_mode == tskm_extra_barrier) { |
| 2257 | __kmp_tasking_barrier(team, this_thr, gtid); |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 2258 | KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n", |
| 2259 | gtid, team_id, tid)); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2260 | } |
| 2261 | #ifdef KMP_DEBUG |
| 2262 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
| 2263 | KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " |
| 2264 | "%p, th_task_team = %p\n", |
| 2265 | __kmp_gtid_from_thread(this_thr), team_id, |
| 2266 | team->t.t_task_team[this_thr->th.th_task_state], |
| 2267 | this_thr->th.th_task_team)); |
Jonathan Peyton | a64c7e6 | 2024-05-07 08:41:51 -0500 | [diff] [blame] | 2268 | KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, this_thr); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2269 | } |
| 2270 | #endif /* KMP_DEBUG */ |
| 2271 | |
| 2272 | /* Copy the blocktime info to the thread, where __kmp_wait_template() can |
| 2273 | access it when the team struct is not guaranteed to exist. Doing these |
| 2274 | loads causes a cache miss slows down EPCC parallel by 2x. As a workaround, |
| 2275 | we do not perform the copy if blocktime=infinite, since the values are not |
| 2276 | used by __kmp_wait_template() in that case. */ |
| 2277 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
| 2278 | #if KMP_USE_MONITOR |
| 2279 | this_thr->th.th_team_bt_intervals = |
| 2280 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; |
| 2281 | this_thr->th.th_team_bt_set = |
| 2282 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; |
| 2283 | #else |
Jonathan Peyton | 52430c6 | 2017-09-05 15:45:48 +0000 | [diff] [blame] | 2284 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2285 | #endif |
| 2286 | } |
| 2287 | |
| 2288 | #if USE_ITT_BUILD |
| 2289 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 2290 | __kmp_itt_barrier_starting(gtid, itt_sync_obj); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2291 | #endif /* USE_ITT_BUILD */ |
| 2292 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2293 | switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 2294 | case bp_dist_bar: { |
| 2295 | __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2296 | NULL USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2297 | break; |
| 2298 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2299 | case bp_hyper_bar: { |
| 2300 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); |
| 2301 | __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2302 | NULL USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2303 | break; |
| 2304 | } |
| 2305 | case bp_hierarchical_bar: { |
| 2306 | __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2307 | NULL USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2308 | break; |
| 2309 | } |
| 2310 | case bp_tree_bar: { |
| 2311 | KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); |
| 2312 | __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2313 | NULL USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2314 | break; |
| 2315 | } |
| 2316 | default: { |
| 2317 | __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2318 | NULL USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2319 | } |
| 2320 | } |
| 2321 | |
| 2322 | /* From this point on, the team data structure may be deallocated at any time |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2323 | by the primary thread - it is unsafe to reference it in any of the worker |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2324 | threads. Any per-team data items that need to be referenced before the |
| 2325 | end of the barrier should be moved to the kmp_task_team_t structs. */ |
| 2326 | if (KMP_MASTER_TID(tid)) { |
| 2327 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
| 2328 | __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2329 | } |
Jonathan Peyton | 211f764 | 2018-12-13 23:14:24 +0000 | [diff] [blame] | 2330 | if (__kmp_display_affinity) { |
| 2331 | KMP_CHECK_UPDATE(team->t.t_display_affinity, 0); |
| 2332 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2333 | #if KMP_STATS_ENABLED |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2334 | // Have primary thread flag the workers to indicate they are now waiting for |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2335 | // next parallel region, Also wake them up so they switch their timers to |
| 2336 | // idle. |
| 2337 | for (int i = 0; i < team->t.t_nproc; ++i) { |
| 2338 | kmp_info_t *team_thread = team->t.t_threads[i]; |
| 2339 | if (team_thread == this_thr) |
| 2340 | continue; |
| 2341 | team_thread->th.th_stats->setIdleFlag(); |
| 2342 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && |
| 2343 | team_thread->th.th_sleep_loc != NULL) |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 2344 | __kmp_null_resume_wrapper(team_thread); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2345 | } |
Andrey Churbanov | fe3c30a | 2015-05-06 19:22:36 +0000 | [diff] [blame] | 2346 | #endif |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2347 | #if USE_ITT_BUILD |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2348 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 2349 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); |
| 2350 | #endif /* USE_ITT_BUILD */ |
| 2351 | |
| 2352 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| 2353 | // Join barrier - report frame end |
| 2354 | if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && |
AndreyChurbanov | 967ebf1 | 2020-07-17 21:10:25 +0300 | [diff] [blame] | 2355 | __kmp_forkjoin_frames_mode && |
| 2356 | (this_thr->th.th_teams_microtask == NULL || // either not in teams |
| 2357 | this_thr->th.th_teams_size.nteams == 1) && // or inside single team |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2358 | team->t.t_active_level == 1) { |
| 2359 | kmp_uint64 cur_time = __itt_get_timestamp(); |
| 2360 | ident_t *loc = team->t.t_ident; |
| 2361 | kmp_info_t **other_threads = team->t.t_threads; |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2362 | switch (__kmp_forkjoin_frames_mode) { |
| 2363 | case 1: |
| 2364 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, |
| 2365 | loc, nproc); |
| 2366 | break; |
| 2367 | case 2: |
| 2368 | __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, |
| 2369 | loc, nproc); |
| 2370 | break; |
| 2371 | case 3: |
| 2372 | if (__itt_metadata_add_ptr) { |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2373 | // Initialize with primary thread's wait time |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2374 | kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; |
| 2375 | // Set arrive time to zero to be able to check it in |
| 2376 | // __kmp_invoke_task(); the same is done inside the loop below |
| 2377 | this_thr->th.th_bar_arrive_time = 0; |
Jonathan Peyton | edc7f6d | 2022-01-31 10:04:49 -0600 | [diff] [blame] | 2378 | for (int i = 1; i < nproc; ++i) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2379 | delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); |
| 2380 | other_threads[i]->th.th_bar_arrive_time = 0; |
| 2381 | } |
| 2382 | __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, |
| 2383 | cur_time, delta, 0); |
| 2384 | } |
| 2385 | __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, |
| 2386 | loc, nproc); |
| 2387 | this_thr->th.th_frame_time = cur_time; |
| 2388 | break; |
| 2389 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2390 | } |
| 2391 | #endif /* USE_ITT_BUILD */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2392 | } |
| 2393 | #if USE_ITT_BUILD |
| 2394 | else { |
| 2395 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) |
| 2396 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); |
| 2397 | } |
| 2398 | #endif /* USE_ITT_BUILD */ |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2399 | |
| 2400 | #if KMP_DEBUG |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2401 | if (KMP_MASTER_TID(tid)) { |
| 2402 | KA_TRACE( |
| 2403 | 15, |
| 2404 | ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", |
| 2405 | gtid, team_id, tid, nproc)); |
| 2406 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2407 | #endif /* KMP_DEBUG */ |
| 2408 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2409 | // TODO now, mark worker threads as done so they may be disbanded |
| 2410 | KMP_MB(); // Flush all pending memory write invalidates. |
| 2411 | KA_TRACE(10, |
| 2412 | ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); |
Andrey Churbanov | 82ccfaa | 2015-04-29 16:42:24 +0000 | [diff] [blame] | 2413 | |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2414 | } |
| 2415 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2416 | // TODO release worker threads' fork barriers as we are ready instead of all at |
| 2417 | // once |
| 2418 | void __kmp_fork_barrier(int gtid, int tid) { |
| 2419 | KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier); |
| 2420 | KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); |
| 2421 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
| 2422 | kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2423 | #if USE_ITT_BUILD |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2424 | void *itt_sync_obj = NULL; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2425 | #endif /* USE_ITT_BUILD */ |
Alexandre Ganea | 09d984a | 2024-01-23 12:03:12 -0500 | [diff] [blame] | 2426 | #ifdef KMP_DEBUG |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2427 | if (team) |
Alexandre Ganea | 09d984a | 2024-01-23 12:03:12 -0500 | [diff] [blame] | 2428 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid, |
| 2429 | (team != NULL) ? team->t.t_id : -1, tid)); |
| 2430 | #endif |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2431 | // th_team pointer only valid for primary thread here |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2432 | if (KMP_MASTER_TID(tid)) { |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2433 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2434 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { |
| 2435 | // Create itt barrier object |
| 2436 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); |
| 2437 | __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing |
| 2438 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2439 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
| 2440 | |
| 2441 | #ifdef KMP_DEBUG |
Hansang Bae | a3918ee | 2021-03-11 17:34:06 -0600 | [diff] [blame] | 2442 | KMP_DEBUG_ASSERT(team); |
Ed Maste | e632f14 | 2017-07-07 21:06:05 +0000 | [diff] [blame] | 2443 | kmp_info_t **other_threads = team->t.t_threads; |
| 2444 | int i; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2445 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2446 | // Verify state |
| 2447 | KMP_MB(); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2448 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2449 | for (i = 1; i < team->t.t_nproc; ++i) { |
| 2450 | KA_TRACE(500, |
| 2451 | ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " |
| 2452 | "== %u.\n", |
| 2453 | gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, |
| 2454 | team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, |
| 2455 | other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); |
| 2456 | KMP_DEBUG_ASSERT( |
| 2457 | (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & |
| 2458 | ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE); |
| 2459 | KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); |
| 2460 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2461 | #endif |
| 2462 | |
Jonathan Peyton | a64c7e6 | 2024-05-07 08:41:51 -0500 | [diff] [blame] | 2463 | if (__kmp_tasking_mode != tskm_immediate_exec) |
| 2464 | __kmp_task_team_setup(this_thr, team); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2465 | |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2466 | /* The primary thread may have changed its blocktime between join barrier |
| 2467 | and fork barrier. Copy the blocktime info to the thread, where |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2468 | __kmp_wait_template() can access it when the team struct is not |
| 2469 | guaranteed to exist. */ |
| 2470 | // See note about the corresponding code in __kmp_join_barrier() being |
| 2471 | // performance-critical |
| 2472 | if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { |
| 2473 | #if KMP_USE_MONITOR |
| 2474 | this_thr->th.th_team_bt_intervals = |
| 2475 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; |
| 2476 | this_thr->th.th_team_bt_set = |
| 2477 | team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; |
| 2478 | #else |
Jonathan Peyton | 52430c6 | 2017-09-05 15:45:48 +0000 | [diff] [blame] | 2479 | this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2480 | #endif |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2481 | } |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2482 | } // primary thread |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2483 | |
| 2484 | switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { |
Terry Wilmarth | 7043950 | 2021-07-15 10:28:47 -0500 | [diff] [blame] | 2485 | case bp_dist_bar: { |
| 2486 | __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2487 | TRUE USE_ITT_BUILD_ARG(NULL)); |
| 2488 | break; |
| 2489 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2490 | case bp_hyper_bar: { |
| 2491 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); |
| 2492 | __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2493 | TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2494 | break; |
| 2495 | } |
| 2496 | case bp_hierarchical_bar: { |
| 2497 | __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2498 | TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2499 | break; |
| 2500 | } |
| 2501 | case bp_tree_bar: { |
| 2502 | KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); |
| 2503 | __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2504 | TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2505 | break; |
| 2506 | } |
| 2507 | default: { |
| 2508 | __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, |
| 2509 | TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); |
| 2510 | } |
| 2511 | } |
| 2512 | |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2513 | #if OMPT_SUPPORT |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2514 | ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2515 | if (ompt_enabled.enabled && |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2516 | (ompt_state == ompt_state_wait_barrier_teams || |
| 2517 | ompt_state == ompt_state_wait_barrier_implicit_parallel)) { |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2518 | int ds_tid = this_thr->th.th_info.ds.ds_tid; |
| 2519 | ompt_data_t *task_data = (team) |
| 2520 | ? OMPT_CUR_TASK_DATA(this_thr) |
| 2521 | : &(this_thr->th.ompt_thread_info.task_data); |
Joachim Protze | 96ae997 | 2018-12-18 08:52:30 +0000 | [diff] [blame] | 2522 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2523 | #if OMPT_OPTIONAL |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2524 | void *codeptr = NULL; |
| 2525 | if (KMP_MASTER_TID(ds_tid) && |
| 2526 | (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || |
| 2527 | ompt_callbacks.ompt_callback(ompt_callback_sync_region))) |
Hansang Bae | a3918ee | 2021-03-11 17:34:06 -0600 | [diff] [blame] | 2528 | codeptr = team ? team->t.ompt_team_info.master_return_address : NULL; |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2529 | ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel; |
| 2530 | if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) |
| 2531 | sync_kind = ompt_sync_region_barrier_teams; |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2532 | if (ompt_enabled.ompt_callback_sync_region_wait) { |
| 2533 | ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2534 | sync_kind, ompt_scope_end, NULL, task_data, codeptr); |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2535 | } |
| 2536 | if (ompt_enabled.ompt_callback_sync_region) { |
| 2537 | ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
Hansang Bae | 3a33e25 | 2024-07-16 09:52:20 -0500 | [diff] [blame] | 2538 | sync_kind, ompt_scope_end, NULL, task_data, codeptr); |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2539 | } |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2540 | #endif |
Jonas Hahnfeld | 6999766 | 2018-02-23 16:46:25 +0000 | [diff] [blame] | 2541 | if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { |
| 2542 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( |
Shilei Tian | d0678fa | 2021-02-20 12:46:04 -0500 | [diff] [blame] | 2543 | ompt_scope_end, NULL, task_data, 0, ds_tid, |
| 2544 | ompt_task_implicit); // TODO: Can this be ompt_task_initial? |
Joachim Protze | 6caf935 | 2017-11-01 10:08:30 +0000 | [diff] [blame] | 2545 | } |
| 2546 | } |
| 2547 | #endif |
| 2548 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2549 | // Early exit for reaping threads releasing forkjoin barrier |
| 2550 | if (TCR_4(__kmp_global.g.g_done)) { |
| 2551 | this_thr->th.th_task_team = NULL; |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2552 | |
| 2553 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| 2554 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2555 | if (!KMP_MASTER_TID(tid)) { |
| 2556 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 2557 | if (itt_sync_obj) |
| 2558 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); |
| 2559 | } |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2560 | } |
| 2561 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2562 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); |
| 2563 | return; |
| 2564 | } |
| 2565 | |
| 2566 | /* We can now assume that a valid team structure has been allocated by the |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2567 | primary thread and propagated to all worker threads. The current thread, |
| 2568 | however, may not be part of the team, so we can't blindly assume that the |
| 2569 | team pointer is non-null. */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2570 | team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); |
| 2571 | KMP_DEBUG_ASSERT(team != NULL); |
| 2572 | tid = __kmp_tid_from_gtid(gtid); |
| 2573 | |
| 2574 | #if KMP_BARRIER_ICV_PULL |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2575 | /* Primary thread's copy of the ICVs was set up on the implicit taskdata in |
| 2576 | __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2577 | implicit task has this data before this function is called. We cannot |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2578 | modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's |
| 2579 | thread struct, because it is not always the case that the threads arrays |
| 2580 | have been allocated when __kmp_fork_call() is executed. */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2581 | { |
| 2582 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2583 | if (!KMP_MASTER_TID(tid)) { // primary thread already has ICVs |
| 2584 | // Copy the initial ICVs from the primary thread's thread struct to the |
| 2585 | // implicit task for this tid. |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2586 | KA_TRACE(10, |
| 2587 | ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); |
| 2588 | __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, |
| 2589 | tid, FALSE); |
| 2590 | copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, |
| 2591 | &team->t.t_threads[0] |
| 2592 | ->th.th_bar[bs_forkjoin_barrier] |
| 2593 | .bb.th_fixed_icvs); |
| 2594 | } |
| 2595 | } |
| 2596 | #endif // KMP_BARRIER_ICV_PULL |
| 2597 | |
| 2598 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
| 2599 | __kmp_task_team_sync(this_thr, team); |
| 2600 | } |
| 2601 | |
Jonathan Peyton | 4d794cd | 2019-07-12 21:45:36 +0000 | [diff] [blame] | 2602 | #if KMP_AFFINITY_SUPPORTED |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2603 | kmp_proc_bind_t proc_bind = team->t.t_proc_bind; |
| 2604 | if (proc_bind == proc_bind_intel) { |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2605 | // Call dynamic affinity settings |
Jonathan Peyton | f0e7a63 | 2022-08-01 17:03:27 -0500 | [diff] [blame] | 2606 | if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) { |
Jonathan Peyton | 2b8f484 | 2018-09-26 20:43:23 +0000 | [diff] [blame] | 2607 | __kmp_balanced_affinity(this_thr, team->t.t_nproc); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2608 | } |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2609 | } else if (proc_bind != proc_bind_false) { |
| 2610 | if (this_thr->th.th_new_place == this_thr->th.th_current_place) { |
| 2611 | KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", |
| 2612 | __kmp_gtid_from_thread(this_thr), |
| 2613 | this_thr->th.th_current_place)); |
| 2614 | } else { |
Jonathan Peyton | f32212d | 2023-07-31 13:46:44 -0500 | [diff] [blame] | 2615 | __kmp_affinity_bind_place(gtid); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2616 | } |
| 2617 | } |
Jonathan Peyton | 4d794cd | 2019-07-12 21:45:36 +0000 | [diff] [blame] | 2618 | #endif // KMP_AFFINITY_SUPPORTED |
Jonathan Peyton | 211f764 | 2018-12-13 23:14:24 +0000 | [diff] [blame] | 2619 | // Perform the display affinity functionality |
| 2620 | if (__kmp_display_affinity) { |
| 2621 | if (team->t.t_display_affinity |
| 2622 | #if KMP_AFFINITY_SUPPORTED |
Jonathan Peyton | f0e7a63 | 2022-08-01 17:03:27 -0500 | [diff] [blame] | 2623 | || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) |
Jonathan Peyton | 211f764 | 2018-12-13 23:14:24 +0000 | [diff] [blame] | 2624 | #endif |
Shilei Tian | d0678fa | 2021-02-20 12:46:04 -0500 | [diff] [blame] | 2625 | ) { |
Jonathan Peyton | 211f764 | 2018-12-13 23:14:24 +0000 | [diff] [blame] | 2626 | // NULL means use the affinity-format-var ICV |
| 2627 | __kmp_aux_display_affinity(gtid, NULL); |
| 2628 | this_thr->th.th_prev_num_threads = team->t.t_nproc; |
| 2629 | this_thr->th.th_prev_level = team->t.t_level; |
| 2630 | } |
| 2631 | } |
Jonathan Peyton | e023a4c | 2018-09-07 18:25:49 +0000 | [diff] [blame] | 2632 | if (!KMP_MASTER_TID(tid)) |
| 2633 | KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator); |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2634 | |
| 2635 | #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| 2636 | if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { |
| 2637 | if (!KMP_MASTER_TID(tid)) { |
| 2638 | // Get correct barrier object |
| 2639 | itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); |
| 2640 | __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired |
| 2641 | } // (prepare called inside barrier_release) |
| 2642 | } |
| 2643 | #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2644 | KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, |
| 2645 | team->t.t_id, tid)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2646 | } |
| 2647 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2648 | void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, |
| 2649 | kmp_internal_control_t *new_icvs, ident_t *loc) { |
| 2650 | KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2651 | |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2652 | KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); |
| 2653 | KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2654 | |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2655 | /* Primary thread's copy of the ICVs was set up on the implicit taskdata in |
| 2656 | __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2657 | implicit task has this data before this function is called. */ |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2658 | #if KMP_BARRIER_ICV_PULL |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2659 | /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which |
| 2660 | remains untouched), where all of the worker threads can access them and |
| 2661 | make their own copies after the barrier. */ |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2662 | KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be |
| 2663 | // allocated at this point |
| 2664 | copy_icvs( |
| 2665 | &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, |
| 2666 | new_icvs); |
| 2667 | KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, |
| 2668 | team->t.t_threads[0], team)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2669 | #elif KMP_BARRIER_ICV_PUSH |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2670 | // The ICVs will be propagated in the fork barrier, so nothing needs to be |
| 2671 | // done here. |
| 2672 | KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, |
| 2673 | team->t.t_threads[0], team)); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2674 | #else |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2675 | // Copy the ICVs to each of the non-primary threads. This takes O(nthreads) |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2676 | // time. |
| 2677 | ngo_load(new_icvs); |
| 2678 | KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be |
| 2679 | // allocated at this point |
tlwilmar | acdce61 | 2021-03-05 09:21:39 -0600 | [diff] [blame] | 2680 | for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread |
Jonathan Peyton | a770c6d | 2017-05-12 18:01:32 +0000 | [diff] [blame] | 2681 | // TODO: GEH - pass in better source location info since usually NULL here |
| 2682 | KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", |
| 2683 | f, team->t.t_threads[f], team)); |
| 2684 | __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); |
| 2685 | ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); |
| 2686 | KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", |
| 2687 | f, team->t.t_threads[f], team)); |
| 2688 | } |
| 2689 | ngo_sync(); |
Jim Cownie | 5f037e1 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 2690 | #endif // KMP_BARRIER_ICV_PULL |
| 2691 | } |