| #if USE_ITT_BUILD |
| /* |
| * kmp_itt.inl -- Inline functions of ITT Notify. |
| */ |
| |
| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // Inline function definitions. This file should be included into kmp_itt.h file |
| // for production build (to let compiler inline functions) or into kmp_itt.c |
| // file for debug build (to reduce the number of files to recompile and save |
| // build time). |
| |
| #include "kmp.h" |
| #include "kmp_str.h" |
| |
| #if KMP_ITT_DEBUG |
| extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; |
| #define KMP_ITT_DEBUG_LOCK() \ |
| { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } |
| #define KMP_ITT_DEBUG_PRINT(...) \ |
| { \ |
| fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ |
| fprintf(stderr, __VA_ARGS__); \ |
| fflush(stderr); \ |
| __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ |
| } |
| #else |
| #define KMP_ITT_DEBUG_LOCK() |
| #define KMP_ITT_DEBUG_PRINT(...) |
| #endif // KMP_ITT_DEBUG |
| |
| // Ensure that the functions are static if they're supposed to be being inlined. |
| // Otherwise they cannot be used in more than one file, since there will be |
| // multiple definitions. |
| #if KMP_DEBUG |
| #define LINKAGE |
| #else |
| #define LINKAGE static inline |
| #endif |
| |
| // ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses |
| // this API to support user-defined synchronization primitives, but does not use |
| // ZCA; it would be safe to turn this off until wider support becomes available. |
| #if USE_ITT_ZCA |
| #ifdef __INTEL_COMPILER |
| #if __INTEL_COMPILER >= 1200 |
| #undef __itt_sync_acquired |
| #undef __itt_sync_releasing |
| #define __itt_sync_acquired(addr) \ |
| __notify_zc_intrinsic((char *)"sync_acquired", addr) |
| #define __itt_sync_releasing(addr) \ |
| __notify_intrinsic((char *)"sync_releasing", addr) |
| #endif |
| #endif |
| #endif |
| |
| static kmp_bootstrap_lock_t metadata_lock = |
| KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); |
| |
| #if USE_ITT_NOTIFY |
| LINKAGE size_t __kmp_itthash_hash(kmp_intptr_t addr, size_t hsize) { |
| return ((addr >> 6) ^ (addr >> 2)) % hsize; |
| } |
| LINKAGE kmp_itthash_entry *__kmp_itthash_find(kmp_info_t *thread, |
| kmp_itthash_t *h, ident_t *loc, |
| int team_size) { |
| kmp_itthash_entry_t *entry; |
| size_t bucket = __kmp_itthash_hash((kmp_intptr_t)loc, KMP_MAX_FRAME_DOMAINS); |
| for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) |
| if (entry->loc == loc && entry->team_size == team_size) |
| break; |
| |
| if (entry == NULL) { |
| // two foreign threads could report frames concurrently |
| int cnt = KMP_TEST_THEN_INC32(&h->count); |
| if (cnt >= KMP_MAX_FRAME_DOMAINS) { |
| KMP_TEST_THEN_DEC32(&h->count); // revert the count |
| return entry; // too many entries |
| } |
| // create new entry |
| entry = (kmp_itthash_entry_t *)__kmp_thread_malloc( |
| thread, sizeof(kmp_itthash_entry_t)); |
| entry->loc = loc; |
| entry->team_size = team_size; |
| entry->d = NULL; |
| entry->next_in_bucket = h->buckets[bucket]; |
| while (!KMP_COMPARE_AND_STORE_PTR(&h->buckets[bucket], |
| entry->next_in_bucket, entry)) { |
| KMP_CPU_PAUSE(); |
| entry->next_in_bucket = h->buckets[bucket]; |
| } |
| } |
| #if KMP_DEBUG |
| else { |
| // check the contents of the location info is unique |
| KMP_DEBUG_ASSERT(loc->psource == entry->loc->psource); |
| } |
| #endif |
| return entry; |
| } |
| #endif |
| |
| /* Parallel region reporting. |
| * __kmp_itt_region_forking should be called by primary thread of a team. |
| Exact moment of call does not matter, but it should be completed before any |
| thread of this team calls __kmp_itt_region_starting. |
| * __kmp_itt_region_starting should be called by each thread of a team just |
| before entering parallel region body. |
| * __kmp_itt_region_finished should be called by each thread of a team right |
| after returning from parallel region body. |
| * __kmp_itt_region_joined should be called by primary thread of a team, after |
| all threads called __kmp_itt_region_finished. |
| |
| Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can |
| execute some more user code -- such a thread can execute tasks. |
| |
| Note: The overhead of logging region_starting and region_finished in each |
| thread is too large, so these calls are not used. */ |
| |
| LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { |
| #if USE_ITT_NOTIFY |
| kmp_team_t *team = __kmp_team_from_gtid(gtid); |
| if (team->t.t_active_level > 1) { |
| // The frame notifications are only supported for the outermost teams. |
| return; |
| } |
| kmp_info_t *th = __kmp_thread_from_gtid(gtid); |
| ident_t *loc = th->th.th_ident; |
| if (!loc) { |
| // no sense to report a region without location info |
| return; |
| } |
| kmp_itthash_entry *e; |
| e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size); |
| if (e == NULL) |
| return; // too many entries in the hash |
| if (e->d == NULL) { |
| // Transform compiler-generated region location into the format |
| // that the tools more or less standardized on: |
| // "<func>$omp$parallel@[file:]<line>[:<col>]" |
| char *buff = NULL; |
| kmp_str_loc_t str_loc = |
| __kmp_str_loc_init(loc->psource, /* init_fname */ false); |
| buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, |
| team_size, str_loc.file, str_loc.line, str_loc.col); |
| |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| e->d = __itt_domain_create(buff); |
| KMP_ASSERT(e->d != NULL); |
| __itt_suppress_pop(); |
| |
| __kmp_str_free(&buff); |
| if (barriers) { |
| kmp_itthash_entry *e; |
| e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0); |
| if (e != NULL) { |
| KMP_DEBUG_ASSERT(e->d == NULL); |
| char *buff = NULL; |
| buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, |
| str_loc.file, str_loc.line); |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| e->d = __itt_domain_create(buff); |
| KMP_ASSERT(e->d != NULL); |
| __itt_suppress_pop(); |
| __kmp_str_free(&buff); |
| } |
| } |
| __kmp_str_loc_free(&str_loc); |
| } |
| __itt_frame_begin_v3(e->d, NULL); |
| KMP_ITT_DEBUG_LOCK(); |
| KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, domain=%p, loc:%p\n", gtid, e->d, |
| loc); |
| #endif |
| } // __kmp_itt_region_forking |
| |
| // ----------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, |
| __itt_timestamp end, int imbalance, |
| ident_t *loc, int team_size, int region) { |
| #if USE_ITT_NOTIFY |
| if (!loc) { |
| // no sense to report a region without location info |
| return; |
| } |
| kmp_info_t *th = __kmp_thread_from_gtid(gtid); |
| if (region) { |
| kmp_team_t *team = __kmp_team_from_gtid(gtid); |
| int serialized = (region == 2 ? 1 : 0); |
| if (team->t.t_active_level + serialized > 1) { |
| // The frame notifications are only supported for the outermost teams. |
| return; |
| } |
| // Check region domain has not been created before. |
| kmp_itthash_entry *e; |
| e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size); |
| if (e == NULL) |
| return; // too many entries in the hash |
| if (e->d == NULL) { // new entry, need to calculate domain |
| // Transform compiler-generated region location into the format |
| // that the tools more or less standardized on: |
| // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" |
| char *buff = NULL; |
| kmp_str_loc_t str_loc = |
| __kmp_str_loc_init(loc->psource, /* init_fname */ false); |
| buff = |
| __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, |
| team_size, str_loc.file, str_loc.line, str_loc.col); |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| e->d = __itt_domain_create(buff); |
| KMP_ASSERT(e->d != NULL); |
| __itt_suppress_pop(); |
| |
| __kmp_str_free(&buff); |
| __kmp_str_loc_free(&str_loc); |
| } |
| __itt_frame_submit_v3(e->d, NULL, begin, end); |
| KMP_ITT_DEBUG_LOCK(); |
| KMP_ITT_DEBUG_PRINT( |
| "[reg sub] gtid=%d, domain=%p, region:%d, loc:%p, beg:%llu, end:%llu\n", |
| gtid, e->d, region, loc, begin, end); |
| return; |
| } else { // called for barrier reporting |
| kmp_itthash_entry *e; |
| e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0); |
| if (e == NULL) |
| return; // too many entries in the hash |
| if (e->d == NULL) { // new entry, need to calculate domain |
| // Transform compiler-generated region location into the format |
| // that the tools more or less standardized on: |
| // "<func>$omp$frame@[file:]<line>[:<col>]" |
| kmp_str_loc_t str_loc = |
| __kmp_str_loc_init(loc->psource, /* init_fname */ false); |
| char *buff = NULL; |
| if (imbalance) { |
| buff = |
| __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", str_loc.func, |
| team_size, str_loc.file, str_loc.line); |
| } else { |
| buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, |
| str_loc.file, str_loc.line); |
| } |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| e->d = __itt_domain_create(buff); |
| KMP_ASSERT(e->d != NULL); |
| __itt_suppress_pop(); |
| __kmp_str_free(&buff); |
| __kmp_str_loc_free(&str_loc); |
| } |
| __itt_frame_submit_v3(e->d, NULL, begin, end); |
| KMP_ITT_DEBUG_LOCK(); |
| KMP_ITT_DEBUG_PRINT( |
| "[frm sub] gtid=%d, domain=%p, loc:%p, beg:%llu, end:%llu\n", gtid, |
| e->d, loc, begin, end); |
| } |
| #endif |
| } // __kmp_itt_frame_submit |
| |
| // ----------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, |
| kmp_uint64 end, kmp_uint64 imbalance, |
| kmp_uint64 reduction) { |
| #if USE_ITT_NOTIFY |
| if (metadata_domain == NULL) { |
| __kmp_acquire_bootstrap_lock(&metadata_lock); |
| if (metadata_domain == NULL) { |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| metadata_domain = __itt_domain_create("OMP Metadata"); |
| string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); |
| string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); |
| string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); |
| __itt_suppress_pop(); |
| } |
| __kmp_release_bootstrap_lock(&metadata_lock); |
| } |
| |
| kmp_uint64 imbalance_data[4]; |
| imbalance_data[0] = begin; |
| imbalance_data[1] = end; |
| imbalance_data[2] = imbalance; |
| imbalance_data[3] = reduction; |
| |
| __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, |
| __itt_metadata_u64, 4, imbalance_data); |
| #endif |
| } // __kmp_itt_metadata_imbalance |
| |
| // ----------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, |
| kmp_uint64 iterations, kmp_uint64 chunk) { |
| #if USE_ITT_NOTIFY |
| if (metadata_domain == NULL) { |
| __kmp_acquire_bootstrap_lock(&metadata_lock); |
| if (metadata_domain == NULL) { |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| metadata_domain = __itt_domain_create("OMP Metadata"); |
| string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); |
| string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); |
| string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); |
| __itt_suppress_pop(); |
| } |
| __kmp_release_bootstrap_lock(&metadata_lock); |
| } |
| |
| // Parse line and column from psource string: ";file;func;line;col;;" |
| KMP_DEBUG_ASSERT(loc->psource); |
| kmp_uint64 loop_data[5]; |
| int line, col; |
| __kmp_str_loc_numbers(loc->psource, &line, &col); |
| loop_data[0] = line; |
| loop_data[1] = col; |
| loop_data[2] = sched_type; |
| loop_data[3] = iterations; |
| loop_data[4] = chunk; |
| |
| __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, |
| __itt_metadata_u64, 5, loop_data); |
| #endif |
| } // __kmp_itt_metadata_loop |
| |
| // ----------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { |
| #if USE_ITT_NOTIFY |
| if (metadata_domain == NULL) { |
| __kmp_acquire_bootstrap_lock(&metadata_lock); |
| if (metadata_domain == NULL) { |
| __itt_suppress_push(__itt_suppress_memory_errors); |
| metadata_domain = __itt_domain_create("OMP Metadata"); |
| string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); |
| string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); |
| string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); |
| __itt_suppress_pop(); |
| } |
| __kmp_release_bootstrap_lock(&metadata_lock); |
| } |
| |
| int line, col; |
| __kmp_str_loc_numbers(loc->psource, &line, &col); |
| kmp_uint64 single_data[2]; |
| single_data[0] = line; |
| single_data[1] = col; |
| |
| __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, |
| __itt_metadata_u64, 2, single_data); |
| #endif |
| } // __kmp_itt_metadata_single |
| |
| // ----------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_region_starting(int gtid) { |
| #if USE_ITT_NOTIFY |
| #endif |
| } // __kmp_itt_region_starting |
| |
| // ----------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_region_finished(int gtid) { |
| #if USE_ITT_NOTIFY |
| #endif |
| } // __kmp_itt_region_finished |
| |
| // ---------------------------------------------------------------------------- |
| LINKAGE void __kmp_itt_region_joined(int gtid) { |
| #if USE_ITT_NOTIFY |
| kmp_team_t *team = __kmp_team_from_gtid(gtid); |
| if (team->t.t_active_level > 1) { |
| // The frame notifications are only supported for the outermost teams. |
| return; |
| } |
| kmp_info_t *th = __kmp_thread_from_gtid(gtid); |
| ident_t *loc = th->th.th_ident; |
| if (loc) { |
| kmp_itthash_entry *e = __kmp_itthash_find(th, &__kmp_itt_region_domains, |
| loc, th->th.th_team_nproc); |
| if (e == NULL) |
| return; // too many entries in the hash |
| KMP_DEBUG_ASSERT(e->d); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_frame_end_v3(e->d, NULL); |
| KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, domain=%p, loc:%p\n", gtid, e->d, |
| loc); |
| } |
| #endif |
| } // __kmp_itt_region_joined |
| |
| /* Barriers reporting. |
| |
| A barrier consists of two phases: |
| 1. Gather -- primary thread waits for all worker threads to arrive; each |
| worker thread registers arrival and goes further. |
| 2. Release -- each worker thread waits until primary thread lets it go; |
| primary thread lets worker threads go. |
| |
| Function should be called by each thread: |
| * __kmp_itt_barrier_starting() -- before arriving to the gather phase. |
| * __kmp_itt_barrier_middle() -- between gather and release phases. |
| * __kmp_itt_barrier_finished() -- after release phase. |
| |
| Note: Call __kmp_itt_barrier_object() before call to |
| __kmp_itt_barrier_starting() and save result in local variable. |
| __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) |
| would return itt sync object for the next barrier! |
| |
| ITT need an address (void *) to be specified as a sync object. OpenMP RTL |
| does not have barrier object or barrier data structure. Barrier is just a |
| counter in team and thread structures. We could use an address of team |
| structure as a barrier sync object, but ITT wants different objects for |
| different barriers (even whithin the same team). So let us use team address |
| as barrier sync object for the first barrier, then increase it by one for the |
| next barrier, and so on (but wrap it not to use addresses outside of team |
| structure). */ |
| |
| void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, |
| int delta // 0 (current barrier) is default |
| // value; specify -1 to get previous |
| // barrier. |
| ) { |
| void *object = NULL; |
| #if USE_ITT_NOTIFY |
| kmp_info_t *thr = __kmp_thread_from_gtid(gtid); |
| kmp_team_t *team = thr->th.th_team; |
| |
| // NOTE: If the function is called from __kmp_fork_barrier, team pointer can |
| // be NULL. This "if" helps to avoid crash. However, this is not complete |
| // solution, and reporting fork/join barriers to ITT should be revisited. |
| |
| if (team != NULL) { |
| // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. |
| // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. |
| kmp_uint64 counter = |
| team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; |
| // Now form the barrier id. Encode barrier type (bt) in barrier id too, so |
| // barriers of different types do not have the same ids. |
| KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); |
| // This condition is a must (we would have zero divide otherwise). |
| KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); |
| // More strong condition: make sure we have room at least for two |
| // different ids (for each barrier type). |
| object = reinterpret_cast<void *>( |
| (kmp_uintptr_t)(team) + |
| (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) * |
| bs_last_barrier + |
| bt); |
| KMP_ITT_DEBUG_LOCK(); |
| KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, |
| counter, object); |
| |
| if (set_name) { |
| ident_t const *loc = NULL; |
| char const *src = NULL; |
| char const *type = "OMP Barrier"; |
| switch (bt) { |
| case bs_plain_barrier: { |
| // For plain barrier compiler calls __kmpc_barrier() function, which |
| // saves location in thr->th.th_ident. |
| loc = thr->th.th_ident; |
| // Get the barrier type from flags provided by compiler. |
| kmp_int32 expl = 0; |
| kmp_uint32 impl = 0; |
| if (loc != NULL) { |
| src = loc->psource; |
| expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; |
| impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; |
| } |
| if (impl) { |
| switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { |
| case KMP_IDENT_BARRIER_IMPL_FOR: { |
| type = "OMP For Barrier"; |
| } break; |
| case KMP_IDENT_BARRIER_IMPL_SECTIONS: { |
| type = "OMP Sections Barrier"; |
| } break; |
| case KMP_IDENT_BARRIER_IMPL_SINGLE: { |
| type = "OMP Single Barrier"; |
| } break; |
| case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { |
| type = "OMP Workshare Barrier"; |
| } break; |
| default: { |
| type = "OMP Implicit Barrier"; |
| KMP_DEBUG_ASSERT(0); |
| } |
| } |
| } else if (expl) { |
| type = "OMP Explicit Barrier"; |
| } |
| } break; |
| case bs_forkjoin_barrier: { |
| // In case of fork/join barrier we can read thr->th.th_ident, because it |
| // contains location of last passed construct (while join barrier is not |
| // such one). Use th_ident of primary thread instead -- |
| // __kmp_join_call() called by the primary thread saves location. |
| // |
| // AC: cannot read from primary thread because __kmp_join_call may not |
| // be called yet, so we read the location from team. This is the |
| // same location. Team is valid on entry to join barrier where this |
| // happens. |
| loc = team->t.t_ident; |
| if (loc != NULL) { |
| src = loc->psource; |
| } |
| type = "OMP Join Barrier"; |
| } break; |
| } |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_create(object, type, src, __itt_attr_barrier); |
| KMP_ITT_DEBUG_PRINT( |
| "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, |
| type, src); |
| } |
| } |
| #endif |
| return object; |
| } // __kmp_itt_barrier_object |
| |
| // ----------------------------------------------------------------------------- |
| void __kmp_itt_barrier_starting(int gtid, void *object) { |
| #if USE_ITT_NOTIFY |
| if (!KMP_MASTER_GTID(gtid)) { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_releasing(object); |
| KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); |
| } |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_prepare(object); |
| KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); |
| #endif |
| } // __kmp_itt_barrier_starting |
| |
| // ----------------------------------------------------------------------------- |
| void __kmp_itt_barrier_middle(int gtid, void *object) { |
| #if USE_ITT_NOTIFY |
| if (KMP_MASTER_GTID(gtid)) { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_acquired(object); |
| KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_releasing(object); |
| KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); |
| } else { |
| } |
| #endif |
| } // __kmp_itt_barrier_middle |
| |
| // ----------------------------------------------------------------------------- |
| void __kmp_itt_barrier_finished(int gtid, void *object) { |
| #if USE_ITT_NOTIFY |
| if (KMP_MASTER_GTID(gtid)) { |
| } else { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_acquired(object); |
| KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); |
| } |
| #endif |
| } // __kmp_itt_barrier_finished |
| |
| /* Taskwait reporting. |
| ITT need an address (void *) to be specified as a sync object. OpenMP RTL |
| does not have taskwait structure, so we need to construct something. */ |
| |
| void *__kmp_itt_taskwait_object(int gtid) { |
| void *object = NULL; |
| #if USE_ITT_NOTIFY |
| if (UNLIKELY(__itt_sync_create_ptr)) { |
| kmp_info_t *thread = __kmp_thread_from_gtid(gtid); |
| kmp_taskdata_t *taskdata = thread->th.th_current_task; |
| object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + |
| taskdata->td_taskwait_counter % |
| sizeof(kmp_taskdata_t)); |
| } |
| #endif |
| return object; |
| } // __kmp_itt_taskwait_object |
| |
| void __kmp_itt_taskwait_starting(int gtid, void *object) { |
| #if USE_ITT_NOTIFY |
| kmp_info_t *thread = __kmp_thread_from_gtid(gtid); |
| kmp_taskdata_t *taskdata = thread->th.th_current_task; |
| ident_t const *loc = taskdata->td_taskwait_ident; |
| char const *src = (loc == NULL ? NULL : loc->psource); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_create(object, "OMP Taskwait", src, 0); |
| KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", |
| object, src); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_prepare(object); |
| KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); |
| #endif |
| } // __kmp_itt_taskwait_starting |
| |
| void __kmp_itt_taskwait_finished(int gtid, void *object) { |
| #if USE_ITT_NOTIFY |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_acquired(object); |
| KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_destroy(object); |
| KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); |
| #endif |
| } // __kmp_itt_taskwait_finished |
| |
| /* Task reporting. |
| Only those tasks are reported which are executed by a thread spinning at |
| barrier (or taskwait). Synch object passed to the function must be barrier of |
| taskwait the threads waiting at. */ |
| |
| void __kmp_itt_task_starting( |
| void *object // ITT sync object: barrier or taskwait. |
| ) { |
| #if USE_ITT_NOTIFY |
| if (UNLIKELY(object != NULL)) { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_cancel(object); |
| KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); |
| } |
| #endif |
| } // __kmp_itt_task_starting |
| |
| // ----------------------------------------------------------------------------- |
| void __kmp_itt_task_finished( |
| void *object // ITT sync object: barrier or taskwait. |
| ) { |
| #if USE_ITT_NOTIFY |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_prepare(object); |
| KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); |
| #endif |
| } // __kmp_itt_task_finished |
| |
| /* Lock reporting. |
| * __kmp_itt_lock_creating( lock ) should be called *before* the first lock |
| operation (set/unset). It is not a real event shown to the user but just |
| setting a name for synchronization object. `lock' is an address of sync |
| object, the same address should be used in all subsequent calls. |
| * __kmp_itt_lock_acquiring() should be called before setting the lock. |
| * __kmp_itt_lock_acquired() should be called after setting the lock. |
| * __kmp_itt_lock_realeasing() should be called before unsetting the lock. |
| * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting |
| for the lock. |
| * __kmp_itt_lock_destroyed( lock ) should be called after the last lock |
| operation. After __kmp_itt_lock_destroyed() all the references to the same |
| address will be considered as another sync object, not related with the |
| original one. */ |
| |
| #if KMP_USE_DYNAMIC_LOCK |
| // Takes location information directly |
| __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, |
| const ident_t *loc) { |
| #if USE_ITT_NOTIFY |
| if (__itt_sync_create_ptr) { |
| char const *src = (loc == NULL ? NULL : loc->psource); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_create(lock, type, src, 0); |
| KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, |
| src); |
| } |
| #endif |
| } |
| #else // KMP_USE_DYNAMIC_LOCK |
| // Internal guts -- common code for locks and critical sections, do not call |
| // directly. |
| __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { |
| #if USE_ITT_NOTIFY |
| if (__itt_sync_create_ptr) { |
| ident_t const *loc = NULL; |
| if (__kmp_get_user_lock_location_ != NULL) |
| loc = __kmp_get_user_lock_location_((lock)); |
| char const *src = (loc == NULL ? NULL : loc->psource); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_create(lock, type, src, 0); |
| KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, |
| src); |
| } |
| #endif |
| } // ___kmp_itt_lock_init |
| #endif // KMP_USE_DYNAMIC_LOCK |
| |
| // Internal guts -- common code for locks and critical sections, do not call |
| // directly. |
| __kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { |
| #if USE_ITT_NOTIFY |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_destroy(lock); |
| KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); |
| #endif |
| } // ___kmp_itt_lock_fini |
| |
| // ----------------------------------------------------------------------------- |
| #if KMP_USE_DYNAMIC_LOCK |
| void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { |
| ___kmp_itt_lock_init(lock, "OMP Lock", loc); |
| } |
| #else |
| void __kmp_itt_lock_creating(kmp_user_lock_p lock) { |
| ___kmp_itt_lock_init(lock, "OMP Lock"); |
| } // __kmp_itt_lock_creating |
| #endif |
| |
| void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { |
| #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY |
| // postpone lock object access |
| if (__itt_sync_prepare_ptr) { |
| if (KMP_EXTRACT_D_TAG(lock) == 0) { |
| kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
| __itt_sync_prepare(ilk->lock); |
| } else { |
| __itt_sync_prepare(lock); |
| } |
| } |
| #else |
| __itt_sync_prepare(lock); |
| #endif |
| } // __kmp_itt_lock_acquiring |
| |
| void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { |
| #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY |
| // postpone lock object access |
| if (__itt_sync_acquired_ptr) { |
| if (KMP_EXTRACT_D_TAG(lock) == 0) { |
| kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
| __itt_sync_acquired(ilk->lock); |
| } else { |
| __itt_sync_acquired(lock); |
| } |
| } |
| #else |
| __itt_sync_acquired(lock); |
| #endif |
| } // __kmp_itt_lock_acquired |
| |
| void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { |
| #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY |
| if (__itt_sync_releasing_ptr) { |
| if (KMP_EXTRACT_D_TAG(lock) == 0) { |
| kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
| __itt_sync_releasing(ilk->lock); |
| } else { |
| __itt_sync_releasing(lock); |
| } |
| } |
| #else |
| __itt_sync_releasing(lock); |
| #endif |
| } // __kmp_itt_lock_releasing |
| |
| void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { |
| #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY |
| if (__itt_sync_cancel_ptr) { |
| if (KMP_EXTRACT_D_TAG(lock) == 0) { |
| kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
| __itt_sync_cancel(ilk->lock); |
| } else { |
| __itt_sync_cancel(lock); |
| } |
| } |
| #else |
| __itt_sync_cancel(lock); |
| #endif |
| } // __kmp_itt_lock_cancelled |
| |
| void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { |
| ___kmp_itt_lock_fini(lock, "OMP Lock"); |
| } // __kmp_itt_lock_destroyed |
| |
| /* Critical reporting. |
| Critical sections are treated exactly as locks (but have different object |
| type). */ |
| #if KMP_USE_DYNAMIC_LOCK |
| void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { |
| ___kmp_itt_lock_init(lock, "OMP Critical", loc); |
| } |
| #else |
| void __kmp_itt_critical_creating(kmp_user_lock_p lock) { |
| ___kmp_itt_lock_init(lock, "OMP Critical"); |
| } // __kmp_itt_critical_creating |
| #endif |
| |
| void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { |
| __itt_sync_prepare(lock); |
| } // __kmp_itt_critical_acquiring |
| |
| void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { |
| __itt_sync_acquired(lock); |
| } // __kmp_itt_critical_acquired |
| |
| void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { |
| __itt_sync_releasing(lock); |
| } // __kmp_itt_critical_releasing |
| |
| void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { |
| ___kmp_itt_lock_fini(lock, "OMP Critical"); |
| } // __kmp_itt_critical_destroyed |
| |
| /* Single reporting. */ |
| |
| void __kmp_itt_single_start(int gtid) { |
| #if USE_ITT_NOTIFY |
| if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { |
| kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); |
| ident_t *loc = thr->th.th_ident; |
| char const *src = (loc == NULL ? NULL : loc->psource); |
| kmp_str_buf_t name; |
| __kmp_str_buf_init(&name); |
| __kmp_str_buf_print(&name, "OMP Single-%s", src); |
| KMP_ITT_DEBUG_LOCK(); |
| thr->th.th_itt_mark_single = __itt_mark_create(name.str); |
| KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, |
| thr->th.th_itt_mark_single); |
| __kmp_str_buf_free(&name); |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_mark(thr->th.th_itt_mark_single, NULL); |
| KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", |
| thr->th.th_itt_mark_single); |
| } |
| #endif |
| } // __kmp_itt_single_start |
| |
| void __kmp_itt_single_end(int gtid) { |
| #if USE_ITT_NOTIFY |
| __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_mark_off(mark); |
| KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); |
| #endif |
| } // __kmp_itt_single_end |
| |
| /* Ordered reporting. |
| * __kmp_itt_ordered_init is called by each thread *before* first using sync |
| object. ITT team would like it to be called once, but it requires extra |
| synchronization. |
| * __kmp_itt_ordered_prep is called when thread is going to enter ordered |
| section (before synchronization). |
| * __kmp_itt_ordered_start is called just before entering user code (after |
| synchronization). |
| * __kmp_itt_ordered_end is called after returning from user code. |
| |
| Sync object is th->th.th_dispatch->th_dispatch_sh_current. |
| Events are not generated in case of serialized team. */ |
| |
| void __kmp_itt_ordered_init(int gtid) { |
| #if USE_ITT_NOTIFY |
| if (__itt_sync_create_ptr) { |
| kmp_info_t *thr = __kmp_thread_from_gtid(gtid); |
| ident_t const *loc = thr->th.th_ident; |
| char const *src = (loc == NULL ? NULL : loc->psource); |
| __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, |
| "OMP Ordered", src, 0); |
| } |
| #endif |
| } // __kmp_itt_ordered_init |
| |
| void __kmp_itt_ordered_prep(int gtid) { |
| #if USE_ITT_NOTIFY |
| if (__itt_sync_create_ptr) { |
| kmp_team_t *t = __kmp_team_from_gtid(gtid); |
| if (!t->t.t_serialized) { |
| kmp_info_t *th = __kmp_thread_from_gtid(gtid); |
| __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); |
| } |
| } |
| #endif |
| } // __kmp_itt_ordered_prep |
| |
| void __kmp_itt_ordered_start(int gtid) { |
| #if USE_ITT_NOTIFY |
| if (__itt_sync_create_ptr) { |
| kmp_team_t *t = __kmp_team_from_gtid(gtid); |
| if (!t->t.t_serialized) { |
| kmp_info_t *th = __kmp_thread_from_gtid(gtid); |
| __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); |
| } |
| } |
| #endif |
| } // __kmp_itt_ordered_start |
| |
| void __kmp_itt_ordered_end(int gtid) { |
| #if USE_ITT_NOTIFY |
| if (__itt_sync_create_ptr) { |
| kmp_team_t *t = __kmp_team_from_gtid(gtid); |
| if (!t->t.t_serialized) { |
| kmp_info_t *th = __kmp_thread_from_gtid(gtid); |
| __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); |
| } |
| } |
| #endif |
| } // __kmp_itt_ordered_end |
| |
| /* Threads reporting. */ |
| |
| void __kmp_itt_thread_ignore() { |
| __itt_thr_ignore(); |
| } // __kmp_itt_thread_ignore |
| |
| void __kmp_itt_thread_name(int gtid) { |
| #if USE_ITT_NOTIFY |
| if (__itt_thr_name_set_ptr) { |
| kmp_str_buf_t name; |
| __kmp_str_buf_init(&name); |
| if (KMP_MASTER_GTID(gtid)) { |
| __kmp_str_buf_print(&name, "OMP Primary Thread #%d", gtid); |
| } else { |
| __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); |
| } |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_thr_name_set(name.str, name.used); |
| KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); |
| __kmp_str_buf_free(&name); |
| } |
| #endif |
| } // __kmp_itt_thread_name |
| |
| /* System object reporting. |
| ITT catches operations with system sync objects (like Windows* OS on IA-32 |
| architecture API critical sections and events). We only need to specify |
| name ("OMP Scheduler") for the object to let ITT know it is an object used |
| by OpenMP RTL for internal purposes. */ |
| |
| void __kmp_itt_system_object_created(void *object, char const *name) { |
| #if USE_ITT_NOTIFY |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_sync_create(object, "OMP Scheduler", name, 0); |
| KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", |
| object, name); |
| #endif |
| } // __kmp_itt_system_object_created |
| |
| /* Stack stitching api. |
| Primary thread calls "create" and put the stitching id into team structure. |
| Workers read the stitching id and call "enter" / "leave" api. |
| Primary thread calls "destroy" at the end of the parallel region. */ |
| |
| __itt_caller __kmp_itt_stack_caller_create() { |
| #if USE_ITT_NOTIFY |
| if (!__itt_stack_caller_create_ptr) |
| return NULL; |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_caller id = __itt_stack_caller_create(); |
| KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); |
| return id; |
| #endif |
| return NULL; |
| } |
| |
| void __kmp_itt_stack_caller_destroy(__itt_caller id) { |
| #if USE_ITT_NOTIFY |
| if (__itt_stack_caller_destroy_ptr) { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_stack_caller_destroy(id); |
| KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); |
| } |
| #endif |
| } |
| |
| void __kmp_itt_stack_callee_enter(__itt_caller id) { |
| #if USE_ITT_NOTIFY |
| if (__itt_stack_callee_enter_ptr) { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_stack_callee_enter(id); |
| KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); |
| } |
| #endif |
| } |
| |
| void __kmp_itt_stack_callee_leave(__itt_caller id) { |
| #if USE_ITT_NOTIFY |
| if (__itt_stack_callee_leave_ptr) { |
| KMP_ITT_DEBUG_LOCK(); |
| __itt_stack_callee_leave(id); |
| KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); |
| } |
| #endif |
| } |
| |
| #endif /* USE_ITT_BUILD */ |