| /* |
| * kmp_runtime.c -- KPTS runtime support library |
| * $Revision: 43473 $ |
| * $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $ |
| */ |
| |
| |
| //===----------------------------------------------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is dual licensed under the MIT and the University of Illinois Open |
| // Source Licenses. See LICENSE.txt for details. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| |
| #include "kmp.h" |
| #include "kmp_atomic.h" |
| #include "kmp_wrapper_getpid.h" |
| #include "kmp_environment.h" |
| #include "kmp_itt.h" |
| #include "kmp_str.h" |
| #include "kmp_settings.h" |
| #include "kmp_i18n.h" |
| #include "kmp_io.h" |
| #include "kmp_error.h" |
| #include "kmp_stats.h" |
| #include "kmp_wait_release.h" |
| |
| /* these are temporary issues to be dealt with */ |
| #define KMP_USE_PRCTL 0 |
| #define KMP_USE_POOLED_ALLOC 0 |
| |
| #if KMP_OS_WINDOWS |
| #include <process.h> |
| #endif |
| |
| |
| #if defined(KMP_GOMP_COMPAT) |
| char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; |
| #endif /* defined(KMP_GOMP_COMPAT) */ |
| |
| char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " |
| #if OMP_40_ENABLED |
| "4.0 (201307)"; |
| #else |
| "3.1 (201107)"; |
| #endif |
| |
| #ifdef KMP_DEBUG |
| char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; |
| #endif /* KMP_DEBUG */ |
| |
| |
| #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| kmp_info_t __kmp_monitor; |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* Forward declarations */ |
| |
| void __kmp_cleanup( void ); |
| |
| static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); |
| static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); |
| static void __kmp_partition_places( kmp_team_t *team ); |
| static void __kmp_do_serial_initialize( void ); |
| void __kmp_fork_barrier( int gtid, int tid ); |
| void __kmp_join_barrier( int gtid ); |
| void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); |
| |
| |
| #ifdef USE_LOAD_BALANCE |
| static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); |
| #endif |
| |
| static int __kmp_expand_threads(int nWish, int nNeed); |
| static int __kmp_unregister_root_other_thread( int gtid ); |
| static void __kmp_unregister_library( void ); // called by __kmp_internal_end() |
| static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); |
| static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* Calculate the identifier of the current thread */ |
| /* fast (and somewhat portable) way to get unique */ |
| /* identifier of executing thread. */ |
| /* returns KMP_GTID_DNE if we haven't been assigned a gtid */ |
| |
| int |
| __kmp_get_global_thread_id( ) |
| { |
| int i; |
| kmp_info_t **other_threads; |
| size_t stack_data; |
| char *stack_addr; |
| size_t stack_size; |
| char *stack_base; |
| |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", |
| __kmp_nth, __kmp_all_nth )); |
| |
| /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a |
| parallel region, made it return KMP_GTID_DNE to force serial_initialize by |
| caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee |
| __kmp_init_gtid for this to work. */ |
| |
| if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; |
| |
| #ifdef KMP_TDATA_GTID |
| if ( TCR_4(__kmp_gtid_mode) >= 3) { |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); |
| return __kmp_gtid; |
| } |
| #endif |
| if ( TCR_4(__kmp_gtid_mode) >= 2) { |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); |
| return __kmp_gtid_get_specific(); |
| } |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); |
| |
| stack_addr = (char*) & stack_data; |
| other_threads = __kmp_threads; |
| |
| /* |
| ATT: The code below is a source of potential bugs due to unsynchronized access to |
| __kmp_threads array. For example: |
| 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. |
| 2. Current thread is suspended by OS. |
| 3. Another thread unregisters and finishes (debug versions of free() may fill memory |
| with something like 0xEF). |
| 4. Current thread is resumed. |
| 5. Current thread reads junk from *thr. |
| TODO: Fix it. |
| --ln |
| */ |
| |
| for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { |
| |
| kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); |
| if( !thr ) continue; |
| |
| stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); |
| stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); |
| |
| /* stack grows down -- search through all of the active threads */ |
| |
| if( stack_addr <= stack_base ) { |
| size_t stack_diff = stack_base - stack_addr; |
| |
| if( stack_diff <= stack_size ) { |
| /* The only way we can be closer than the allocated */ |
| /* stack size is if we are running on this thread. */ |
| KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); |
| return i; |
| } |
| } |
| } |
| |
| /* get specific to try and determine our gtid */ |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " |
| "thread, using TLS\n" )); |
| i = __kmp_gtid_get_specific(); |
| |
| /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ |
| |
| /* if we havn't been assigned a gtid, then return code */ |
| if( i<0 ) return i; |
| |
| /* dynamically updated stack window for uber threads to avoid get_specific call */ |
| if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { |
| KMP_FATAL( StackOverflow, i ); |
| } |
| |
| stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; |
| if( stack_addr > stack_base ) { |
| TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); |
| TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, |
| other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); |
| } else { |
| TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); |
| } |
| |
| /* Reprint stack bounds for ubermaster since they have been refined */ |
| if ( __kmp_storage_map ) { |
| char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; |
| char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; |
| __kmp_print_storage_map_gtid( i, stack_beg, stack_end, |
| other_threads[i]->th.th_info.ds.ds_stacksize, |
| "th_%d stack (refinement)", i ); |
| } |
| return i; |
| } |
| |
| int |
| __kmp_get_global_thread_id_reg( ) |
| { |
| int gtid; |
| |
| if ( !__kmp_init_serial ) { |
| gtid = KMP_GTID_DNE; |
| } else |
| #ifdef KMP_TDATA_GTID |
| if ( TCR_4(__kmp_gtid_mode) >= 3 ) { |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); |
| gtid = __kmp_gtid; |
| } else |
| #endif |
| if ( TCR_4(__kmp_gtid_mode) >= 2 ) { |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); |
| gtid = __kmp_gtid_get_specific(); |
| } else { |
| KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); |
| gtid = __kmp_get_global_thread_id(); |
| } |
| |
| /* we must be a new uber master sibling thread */ |
| if( gtid == KMP_GTID_DNE ) { |
| KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " |
| "Registering a new gtid.\n" )); |
| __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); |
| if( !__kmp_init_serial ) { |
| __kmp_do_serial_initialize(); |
| gtid = __kmp_gtid_get_specific(); |
| } else { |
| gtid = __kmp_register_root(FALSE); |
| } |
| __kmp_release_bootstrap_lock( &__kmp_initz_lock ); |
| /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ |
| } |
| |
| KMP_DEBUG_ASSERT( gtid >=0 ); |
| |
| return gtid; |
| } |
| |
| /* caller must hold forkjoin_lock */ |
| void |
| __kmp_check_stack_overlap( kmp_info_t *th ) |
| { |
| int f; |
| char *stack_beg = NULL; |
| char *stack_end = NULL; |
| int gtid; |
| |
| KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); |
| if ( __kmp_storage_map ) { |
| stack_end = (char *) th->th.th_info.ds.ds_stackbase; |
| stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; |
| |
| gtid = __kmp_gtid_from_thread( th ); |
| |
| if (gtid == KMP_GTID_MONITOR) { |
| __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, |
| "th_%s stack (%s)", "mon", |
| ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); |
| } else { |
| __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, |
| "th_%d stack (%s)", gtid, |
| ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); |
| } |
| } |
| |
| /* No point in checking ubermaster threads since they use refinement and cannot overlap */ |
| if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th ))) |
| { |
| KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); |
| if ( stack_beg == NULL ) { |
| stack_end = (char *) th->th.th_info.ds.ds_stackbase; |
| stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; |
| } |
| |
| for( f=0 ; f < __kmp_threads_capacity ; f++ ) { |
| kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); |
| |
| if( f_th && f_th != th ) { |
| char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); |
| char *other_stack_beg = other_stack_end - |
| (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); |
| if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || |
| (stack_end > other_stack_beg && stack_end < other_stack_end)) { |
| |
| /* Print the other stack values before the abort */ |
| if ( __kmp_storage_map ) |
| __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, |
| (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), |
| "th_%d stack (overlapped)", |
| __kmp_gtid_from_thread( f_th ) ); |
| |
| __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); |
| } |
| } |
| } |
| } |
| KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); |
| } |
| |
| |
| /* ------------------------------------------------------------------------ */ |
| |
| /* ------------------------------------------------------------------------ */ |
| |
| void |
| __kmp_infinite_loop( void ) |
| { |
| static int done = FALSE; |
| |
| while (! done) { |
| KMP_YIELD( 1 ); |
| } |
| } |
| |
| #define MAX_MESSAGE 512 |
| |
| void |
| __kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { |
| char buffer[MAX_MESSAGE]; |
| int node; |
| va_list ap; |
| |
| va_start( ap, format); |
| sprintf( buffer, "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); |
| __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); |
| __kmp_vprintf( kmp_err, buffer, ap ); |
| #if KMP_PRINT_DATA_PLACEMENT |
| if(gtid >= 0) { |
| if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { |
| if( __kmp_storage_map_verbose ) { |
| node = __kmp_get_host_node(p1); |
| if(node < 0) /* doesn't work, so don't try this next time */ |
| __kmp_storage_map_verbose = FALSE; |
| else { |
| char *last; |
| int lastNode; |
| int localProc = __kmp_get_cpu_from_gtid(gtid); |
| |
| p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) ); |
| p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) ); |
| if(localProc >= 0) |
| __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1); |
| else |
| __kmp_printf_no_lock(" GTID %d\n", gtid); |
| # if KMP_USE_PRCTL |
| /* The more elaborate format is disabled for now because of the prctl hanging bug. */ |
| do { |
| last = p1; |
| lastNode = node; |
| /* This loop collates adjacent pages with the same host node. */ |
| do { |
| (char*)p1 += PAGE_SIZE; |
| } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); |
| __kmp_printf_no_lock(" %p-%p memNode %d\n", last, |
| (char*)p1 - 1, lastNode); |
| } while(p1 <= p2); |
| # else |
| __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, |
| (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1)); |
| if(p1 < p2) { |
| __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, |
| (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2)); |
| } |
| # endif |
| } |
| } |
| } else |
| __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) ); |
| } |
| #endif /* KMP_PRINT_DATA_PLACEMENT */ |
| __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); |
| } |
| |
| void |
| __kmp_warn( char const * format, ... ) |
| { |
| char buffer[MAX_MESSAGE]; |
| va_list ap; |
| |
| if ( __kmp_generate_warnings == kmp_warnings_off ) { |
| return; |
| } |
| |
| va_start( ap, format ); |
| |
| snprintf( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); |
| __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); |
| __kmp_vprintf( kmp_err, buffer, ap ); |
| __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); |
| |
| va_end( ap ); |
| } |
| |
| void |
| __kmp_abort_process() |
| { |
| |
| // Later threads may stall here, but that's ok because abort() will kill them. |
| __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); |
| |
| if ( __kmp_debug_buf ) { |
| __kmp_dump_debug_buffer(); |
| }; // if |
| |
| if ( KMP_OS_WINDOWS ) { |
| // Let other threads know of abnormal termination and prevent deadlock |
| // if abort happened during library initialization or shutdown |
| __kmp_global.g.g_abort = SIGABRT; |
| |
| /* |
| On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. |
| Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() |
| works well, but this function is not available in VS7 (this is not problem for DLL, but |
| it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does |
| not help, at least in some versions of MS C RTL. |
| |
| It seems following sequence is the only way to simulate abort() and avoid pop-up error |
| box. |
| */ |
| raise( SIGABRT ); |
| _exit( 3 ); // Just in case, if signal ignored, exit anyway. |
| } else { |
| abort(); |
| }; // if |
| |
| __kmp_infinite_loop(); |
| __kmp_release_bootstrap_lock( & __kmp_exit_lock ); |
| |
| } // __kmp_abort_process |
| |
| void |
| __kmp_abort_thread( void ) |
| { |
| // TODO: Eliminate g_abort global variable and this function. |
| // In case of abort just call abort(), it will kill all the threads. |
| __kmp_infinite_loop(); |
| } // __kmp_abort_thread |
| |
| /* ------------------------------------------------------------------------ */ |
| |
| /* |
| * Print out the storage map for the major kmp_info_t thread data structures |
| * that are allocated together. |
| */ |
| |
| static void |
| __kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) |
| { |
| __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); |
| |
| __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), |
| "th_%d.th_info", gtid ); |
| |
| __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), |
| "th_%d.th_local", gtid ); |
| |
| __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], |
| sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); |
| |
| __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], |
| &thr->th.th_bar[bs_plain_barrier+1], |
| sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); |
| |
| __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], |
| &thr->th.th_bar[bs_forkjoin_barrier+1], |
| sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); |
| |
| #if KMP_FAST_REDUCTION_BARRIER |
| __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], |
| &thr->th.th_bar[bs_reduction_barrier+1], |
| sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); |
| #endif // KMP_FAST_REDUCTION_BARRIER |
| } |
| |
| /* |
| * Print out the storage map for the major kmp_team_t team data structures |
| * that are allocated together. |
| */ |
| |
| static void |
| __kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) |
| { |
| int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2; |
| __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", |
| header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], |
| sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); |
| |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], |
| sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], |
| sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); |
| |
| #if KMP_FAST_REDUCTION_BARRIER |
| __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], |
| sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); |
| #endif // KMP_FAST_REDUCTION_BARRIER |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], |
| sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], |
| sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], |
| sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", |
| header, team_id ); |
| |
| /* |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr], |
| sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr], |
| sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr], |
| sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr], |
| sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr], |
| sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr], |
| sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id ); |
| |
| //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr], |
| // sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id ); |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr], |
| sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id ); |
| #if OMP_40_ENABLED |
| __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr], |
| sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id ); |
| #endif |
| */ |
| |
| __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, |
| sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); |
| } |
| |
| static void __kmp_init_allocator() {} |
| static void __kmp_fini_allocator() {} |
| static void __kmp_fini_allocator_thread() {} |
| |
| /* ------------------------------------------------------------------------ */ |
| |
| #ifdef GUIDEDLL_EXPORTS |
| # if KMP_OS_WINDOWS |
| |
| |
| static void |
| __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { |
| // TODO: Change to __kmp_break_bootstrap_lock(). |
| __kmp_init_bootstrap_lock( lck ); // make the lock released |
| } |
| |
| static void |
| __kmp_reset_locks_on_process_detach( int gtid_req ) { |
| int i; |
| int thread_count; |
| |
| // PROCESS_DETACH is expected to be called by a thread |
| // that executes ProcessExit() or FreeLibrary(). |
| // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). |
| // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. |
| // However, in fact, some threads can be still alive here, although being about to be terminated. |
| // The threads in the array with ds_thread==0 are most suspicious. |
| // Actually, it can be not safe to access the __kmp_threads[]. |
| |
| // TODO: does it make sense to check __kmp_roots[] ? |
| |
| // Let's check that there are no other alive threads registered with the OMP lib. |
| while( 1 ) { |
| thread_count = 0; |
| for( i = 0; i < __kmp_threads_capacity; ++i ) { |
| if( !__kmp_threads ) continue; |
| kmp_info_t* th = __kmp_threads[ i ]; |
| if( th == NULL ) continue; |
| int gtid = th->th.th_info.ds.ds_gtid; |
| if( gtid == gtid_req ) continue; |
| if( gtid < 0 ) continue; |
| DWORD exit_val; |
| int alive = __kmp_is_thread_alive( th, &exit_val ); |
| if( alive ) { |
| ++thread_count; |
| } |
| } |
| if( thread_count == 0 ) break; // success |
| } |
| |
| // Assume that I'm alone. |
| |
| // Now it might be probably safe to check and reset locks. |
| // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. |
| __kmp_reset_lock( &__kmp_forkjoin_lock ); |
| #ifdef KMP_DEBUG |
| __kmp_reset_lock( &__kmp_stdio_lock ); |
| #endif // KMP_DEBUG |
| |
| |
| } |
| |
| BOOL WINAPI |
| DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { |
| //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); |
| |
| switch( fdwReason ) { |
| |
| case DLL_PROCESS_ATTACH: |
| KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); |
| |
| return TRUE; |
| |
| case DLL_PROCESS_DETACH: |
| KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", |
| __kmp_gtid_get_specific() )); |
| |
| if( lpReserved != NULL ) |
| { |
| // lpReserved is used for telling the difference: |
| // lpReserved == NULL when FreeLibrary() was called, |
| // lpReserved != NULL when the process terminates. |
| // When FreeLibrary() is called, worker threads remain alive. |
| // So they will release the forkjoin lock by themselves. |
| // When the process terminates, worker threads disappear triggering |
| // the problem of unreleased forkjoin lock as described below. |
| |
| // A worker thread can take the forkjoin lock |
| // in __kmp_suspend_template()->__kmp_rml_decrease_load_before_sleep(). |
| // The problem comes up if that worker thread becomes dead |
| // before it releases the forkjoin lock. |
| // The forkjoin lock remains taken, while the thread |
| // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below |
| // will try to take the forkjoin lock and will always fail, |
| // so that the application will never finish [normally]. |
| // This scenario is possible if __kmpc_end() has not been executed. |
| // It looks like it's not a corner case, but common cases: |
| // - the main function was compiled by an alternative compiler; |
| // - the main function was compiled by icl but without /Qopenmp (application with plugins); |
| // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. |
| // - alive foreign thread prevented __kmpc_end from doing cleanup. |
| |
| // This is a hack to work around the problem. |
| // TODO: !!! to figure out something better. |
| __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); |
| } |
| |
| __kmp_internal_end_library( __kmp_gtid_get_specific() ); |
| |
| return TRUE; |
| |
| case DLL_THREAD_ATTACH: |
| KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); |
| |
| /* if we wanted to register new siblings all the time here call |
| * __kmp_get_gtid(); */ |
| return TRUE; |
| |
| case DLL_THREAD_DETACH: |
| KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", |
| __kmp_gtid_get_specific() )); |
| |
| __kmp_internal_end_thread( __kmp_gtid_get_specific() ); |
| return TRUE; |
| } |
| |
| return TRUE; |
| } |
| |
| # endif /* KMP_OS_WINDOWS */ |
| #endif /* GUIDEDLL_EXPORTS */ |
| |
| |
| /* ------------------------------------------------------------------------ */ |
| |
| /* Change the library type to "status" and return the old type */ |
| /* called from within initialization routines where __kmp_initz_lock is held */ |
| int |
| __kmp_change_library( int status ) |
| { |
| int old_status; |
| |
| old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count) |
| |
| if (status) { |
| __kmp_yield_init |= 1; // throughput => turnaround (odd init count) |
| } |
| else { |
| __kmp_yield_init &= ~1; // turnaround => throughput (even init count) |
| } |
| |
| return old_status; // return previous setting of whether KMP_LIBRARY=throughput |
| } |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* __kmp_parallel_deo -- |
| * Wait until it's our turn. |
| */ |
| void |
| __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) |
| { |
| int gtid = *gtid_ref; |
| #ifdef BUILD_PARALLEL_ORDERED |
| kmp_team_t *team = __kmp_team_from_gtid( gtid ); |
| #endif /* BUILD_PARALLEL_ORDERED */ |
| |
| if( __kmp_env_consistency_check ) { |
| if( __kmp_threads[gtid]->th.th_root->r.r_active ) |
| __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); |
| } |
| #ifdef BUILD_PARALLEL_ORDERED |
| if( !team->t.t_serialized ) { |
| kmp_uint32 spins; |
| |
| KMP_MB(); |
| KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); |
| KMP_MB(); |
| } |
| #endif /* BUILD_PARALLEL_ORDERED */ |
| } |
| |
| /* __kmp_parallel_dxo -- |
| * Signal the next task. |
| */ |
| |
| void |
| __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) |
| { |
| int gtid = *gtid_ref; |
| #ifdef BUILD_PARALLEL_ORDERED |
| int tid = __kmp_tid_from_gtid( gtid ); |
| kmp_team_t *team = __kmp_team_from_gtid( gtid ); |
| #endif /* BUILD_PARALLEL_ORDERED */ |
| |
| if( __kmp_env_consistency_check ) { |
| if( __kmp_threads[gtid]->th.th_root->r.r_active ) |
| __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); |
| } |
| #ifdef BUILD_PARALLEL_ORDERED |
| if ( ! team->t.t_serialized ) { |
| KMP_MB(); /* Flush all pending memory write invalidates. */ |
| |
| /* use the tid of the next thread in this team */ |
| /* TODO repleace with general release procedure */ |
| team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); |
| |
| KMP_MB(); /* Flush all pending memory write invalidates. */ |
| } |
| #endif /* BUILD_PARALLEL_ORDERED */ |
| } |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* The BARRIER for a SINGLE process section is always explicit */ |
| |
| int |
| __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) |
| { |
| int status; |
| kmp_info_t *th; |
| kmp_team_t *team; |
| |
| if( ! TCR_4(__kmp_init_parallel) ) |
| __kmp_parallel_initialize(); |
| |
| th = __kmp_threads[ gtid ]; |
| team = th->th.th_team; |
| status = 0; |
| |
| th->th.th_ident = id_ref; |
| |
| if ( team->t.t_serialized ) { |
| status = 1; |
| } else { |
| kmp_int32 old_this = th->th.th_local.this_construct; |
| |
| ++th->th.th_local.this_construct; |
| /* try to set team count to thread count--success means thread got the |
| single block |
| */ |
| /* TODO: Should this be acquire or release? */ |
| status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, |
| th->th.th_local.this_construct); |
| } |
| |
| if( __kmp_env_consistency_check ) { |
| if (status && push_ws) { |
| __kmp_push_workshare( gtid, ct_psingle, id_ref ); |
| } else { |
| __kmp_check_workshare( gtid, ct_psingle, id_ref ); |
| } |
| } |
| #if USE_ITT_BUILD |
| if ( status ) { |
| __kmp_itt_single_start( gtid ); |
| } |
| if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) { |
| __kmp_itt_metadata_single(); |
| } |
| |
| #endif /* USE_ITT_BUILD */ |
| return status; |
| } |
| |
| void |
| __kmp_exit_single( int gtid ) |
| { |
| #if USE_ITT_BUILD |
| __kmp_itt_single_end( gtid ); |
| #endif /* USE_ITT_BUILD */ |
| if( __kmp_env_consistency_check ) |
| __kmp_pop_workshare( gtid, ct_psingle, NULL ); |
| } |
| |
| |
| /* |
| * determine if we can go parallel or must use a serialized parallel region and |
| * how many threads we can use |
| * set_nproc is the number of threads requested for the team |
| * returns 0 if we should serialize or only use one thread, |
| * otherwise the number of threads to use |
| * The forkjoin lock is held by the caller. |
| */ |
| static int |
| __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, |
| int master_tid, int set_nthreads |
| #if OMP_40_ENABLED |
| , int enter_teams |
| #endif /* OMP_40_ENABLED */ |
| ) |
| { |
| int capacity; |
| int new_nthreads; |
| int use_rml_to_adjust_nth; |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| KMP_DEBUG_ASSERT( root && parent_team ); |
| |
| // |
| // Initial check to see if we should use a serialized team. |
| // |
| if ( set_nthreads == 1 ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n", |
| __kmp_get_gtid(), set_nthreads )); |
| return 1; |
| } |
| if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel |
| #if OMP_40_ENABLED |
| && !enter_teams |
| #endif /* OMP_40_ENABLED */ |
| ) ) || ( __kmp_library == library_serial ) ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team; requested %d threads\n", |
| __kmp_get_gtid(), set_nthreads )); |
| return 1; |
| } |
| |
| // |
| // If dyn-var is set, dynamically adjust the number of desired threads, |
| // according to the method specified by dynamic_mode. |
| // |
| new_nthreads = set_nthreads; |
| use_rml_to_adjust_nth = FALSE; |
| if ( ! get__dynamic_2( parent_team, master_tid ) ) { |
| ; |
| } |
| #ifdef USE_LOAD_BALANCE |
| else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { |
| new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); |
| if ( new_nthreads == 1 ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", |
| master_tid )); |
| return 1; |
| } |
| if ( new_nthreads < set_nthreads ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", |
| master_tid, new_nthreads )); |
| } |
| } |
| #endif /* USE_LOAD_BALANCE */ |
| else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { |
| new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 |
| : root->r.r_hot_team->t.t_nproc); |
| if ( new_nthreads <= 1 ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", |
| master_tid )); |
| return 1; |
| } |
| if ( new_nthreads < set_nthreads ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", |
| master_tid, new_nthreads )); |
| } |
| else { |
| new_nthreads = set_nthreads; |
| } |
| } |
| else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { |
| if ( set_nthreads > 2 ) { |
| new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); |
| new_nthreads = ( new_nthreads % set_nthreads ) + 1; |
| if ( new_nthreads == 1 ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", |
| master_tid )); |
| return 1; |
| } |
| if ( new_nthreads < set_nthreads ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", |
| master_tid, new_nthreads )); |
| } |
| } |
| } |
| else { |
| KMP_ASSERT( 0 ); |
| } |
| |
| // |
| // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. |
| // |
| if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : |
| root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { |
| int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : |
| root->r.r_hot_team->t.t_nproc ); |
| if ( tl_nthreads <= 0 ) { |
| tl_nthreads = 1; |
| } |
| |
| // |
| // If dyn-var is false, emit a 1-time warning. |
| // |
| if ( ! get__dynamic_2( parent_team, master_tid ) |
| && ( ! __kmp_reserve_warn ) ) { |
| __kmp_reserve_warn = 1; |
| __kmp_msg( |
| kmp_ms_warning, |
| KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), |
| KMP_HNT( Unset_ALL_THREADS ), |
| __kmp_msg_null |
| ); |
| } |
| if ( tl_nthreads == 1 ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", |
| master_tid )); |
| return 1; |
| } |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", |
| master_tid, tl_nthreads )); |
| new_nthreads = tl_nthreads; |
| } |
| |
| |
| // |
| // Check if the threads array is large enough, or needs expanding. |
| // |
| // See comment in __kmp_register_root() about the adjustment if |
| // __kmp_threads[0] == NULL. |
| // |
| capacity = __kmp_threads_capacity; |
| if ( TCR_PTR(__kmp_threads[0]) == NULL ) { |
| --capacity; |
| } |
| if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : |
| root->r.r_hot_team->t.t_nproc ) > capacity ) { |
| // |
| // Expand the threads array. |
| // |
| int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : |
| root->r.r_hot_team->t.t_nproc ) - capacity; |
| int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); |
| if ( slotsAdded < slotsRequired ) { |
| // |
| // The threads array was not expanded enough. |
| // |
| new_nthreads -= ( slotsRequired - slotsAdded ); |
| KMP_ASSERT( new_nthreads >= 1 ); |
| |
| // |
| // If dyn-var is false, emit a 1-time warning. |
| // |
| if ( ! get__dynamic_2( parent_team, master_tid ) |
| && ( ! __kmp_reserve_warn ) ) { |
| __kmp_reserve_warn = 1; |
| if ( __kmp_tp_cached ) { |
| __kmp_msg( |
| kmp_ms_warning, |
| KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), |
| KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), |
| KMP_HNT( PossibleSystemLimitOnThreads ), |
| __kmp_msg_null |
| ); |
| } |
| else { |
| __kmp_msg( |
| kmp_ms_warning, |
| KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), |
| KMP_HNT( SystemLimitOnThreads ), |
| __kmp_msg_null |
| ); |
| } |
| } |
| } |
| } |
| |
| if ( new_nthreads == 1 ) { |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", |
| __kmp_get_gtid(), set_nthreads ) ); |
| return 1; |
| } |
| |
| KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", |
| __kmp_get_gtid(), new_nthreads, set_nthreads )); |
| return new_nthreads; |
| } |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* allocate threads from the thread pool and assign them to the new team */ |
| /* we are assured that there are enough threads available, because we |
| * checked on that earlier within critical section forkjoin */ |
| |
| static void |
| __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, |
| kmp_info_t *master_th, int master_gtid ) |
| { |
| int i; |
| int use_hot_team; |
| |
| KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); |
| KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); |
| KMP_MB(); |
| |
| /* first, let's setup the master thread */ |
| master_th->th.th_info.ds.ds_tid = 0; |
| master_th->th.th_team = team; |
| master_th->th.th_team_nproc = team->t.t_nproc; |
| master_th->th.th_team_master = master_th; |
| master_th->th.th_team_serialized = FALSE; |
| master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ]; |
| |
| /* make sure we are not the optimized hot team */ |
| #if KMP_NESTED_HOT_TEAMS |
| use_hot_team = 0; |
| kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; |
| if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 |
| int level = team->t.t_active_level - 1; // index in array of hot teams |
| if( master_th->th.th_teams_microtask ) { // are we inside the teams? |
| if( master_th->th.th_teams_size.nteams > 1 ) { |
| ++level; // level was not increased in teams construct for team_of_masters |
| } |
| if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && |
| master_th->th.th_teams_level == team->t.t_level ) { |
| ++level; // level was not increased in teams construct for team_of_workers before the parallel |
| } // team->t.t_level will be increased inside parallel |
| } |
| if( level < __kmp_hot_teams_max_level ) { |
| if( hot_teams[level].hot_team ) { |
| // hot team has already been allocated for given level |
| KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); |
| use_hot_team = 1; // the team is ready to use |
| } else { |
| use_hot_team = 0; // AC: threads are not allocated yet |
| hot_teams[level].hot_team = team; // remember new hot team |
| hot_teams[level].hot_team_nth = team->t.t_nproc; |
| } |
| } else { |
| use_hot_team = 0; |
| } |
| } |
| #else |
| use_hot_team = team == root->r.r_hot_team; |
| #endif |
| if ( !use_hot_team ) { |
| |
| /* install the master thread */ |
| team->t.t_threads[ 0 ] = master_th; |
| __kmp_initialize_info( master_th, team, 0, master_gtid ); |
| |
| /* now, install the worker threads */ |
| for ( i=1 ; i < team->t.t_nproc ; i++ ) { |
| |
| /* fork or reallocate a new thread and install it in team */ |
| kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); |
| team->t.t_threads[ i ] = thr; |
| KMP_DEBUG_ASSERT( thr ); |
| KMP_DEBUG_ASSERT( thr->th.th_team == team ); |
| /* align team and thread arrived states */ |
| KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n", |
| __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, |
| __kmp_gtid_from_tid( i, team ), team->t.t_id, i, |
| team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, |
| team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); |
| #if OMP_40_ENABLED |
| thr->th.th_teams_microtask = master_th->th.th_teams_microtask; |
| thr->th.th_teams_level = master_th->th.th_teams_level; |
| thr->th.th_teams_size = master_th->th.th_teams_size; |
| #endif |
| { // Initialize threads' barrier data. |
| int b; |
| kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; |
| for ( b = 0; b < bs_last_barrier; ++ b ) { |
| balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; |
| KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); |
| }; // for b |
| } |
| } |
| |
| #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED |
| __kmp_partition_places( team ); |
| #endif |
| |
| } |
| |
| KMP_MB(); |
| } |
| |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| // |
| // Propagate any changes to the floating point control registers out to the team |
| // We try to avoid unnecessary writes to the relevant cache line in the team structure, |
| // so we don't make changes unless they are needed. |
| // |
| inline static void |
| propagateFPControl(kmp_team_t * team) |
| { |
| if ( __kmp_inherit_fp_control ) { |
| kmp_int16 x87_fpu_control_word; |
| kmp_uint32 mxcsr; |
| |
| // Get master values of FPU control flags (both X87 and vector) |
| __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); |
| __kmp_store_mxcsr( &mxcsr ); |
| mxcsr &= KMP_X86_MXCSR_MASK; |
| |
| // There is no point looking at t_fp_control_saved here. |
| // If it is TRUE, we still have to update the values if they are different from those we now have. |
| // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure |
| // that the values in the team are the same as those we have. |
| // So, this code achieves what we need whether or not t_fp_control_saved is true. |
| // By checking whether the value needs updating we avoid unnecessary writes that would put the |
| // cache-line into a written state, causing all threads in the team to have to read it again. |
| if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { |
| team->t.t_x87_fpu_control_word = x87_fpu_control_word; |
| } |
| if ( team->t.t_mxcsr != mxcsr ) { |
| team->t.t_mxcsr = mxcsr; |
| } |
| // Although we don't use this value, other code in the runtime wants to know whether it should restore them. |
| // So we must ensure it is correct. |
| if (!team->t.t_fp_control_saved) { |
| team->t.t_fp_control_saved = TRUE; |
| } |
| } |
| else { |
| // Similarly here. Don't write to this cache-line in the team structure unless we have to. |
| if (team->t.t_fp_control_saved) |
| team->t.t_fp_control_saved = FALSE; |
| } |
| } |
| |
| // Do the opposite, setting the hardware registers to the updated values from the team. |
| inline static void |
| updateHWFPControl(kmp_team_t * team) |
| { |
| if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { |
| // |
| // Only reset the fp control regs if they have been changed in the team. |
| // the parallel region that we are exiting. |
| // |
| kmp_int16 x87_fpu_control_word; |
| kmp_uint32 mxcsr; |
| __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); |
| __kmp_store_mxcsr( &mxcsr ); |
| mxcsr &= KMP_X86_MXCSR_MASK; |
| |
| if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { |
| __kmp_clear_x87_fpu_status_word(); |
| __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); |
| } |
| |
| if ( team->t.t_mxcsr != mxcsr ) { |
| __kmp_load_mxcsr( &team->t.t_mxcsr ); |
| } |
| } |
| } |
| #else |
| # define propagateFPControl(x) ((void)0) |
| # define updateHWFPControl(x) ((void)0) |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| static void |
| __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration |
| |
| /* |
| * Run a parallel region that has been serialized, so runs only in a team of the single master thread. |
| */ |
| void |
| __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) |
| { |
| kmp_info_t *this_thr; |
| kmp_team_t *serial_team; |
| |
| KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); |
| |
| /* Skip all this code for autopar serialized loops since it results in |
| unacceptable overhead */ |
| if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) |
| return; |
| |
| if( ! TCR_4( __kmp_init_parallel ) ) |
| __kmp_parallel_initialize(); |
| |
| this_thr = __kmp_threads[ global_tid ]; |
| serial_team = this_thr->th.th_serial_team; |
| |
| /* utilize the serialized team held by this thread */ |
| KMP_DEBUG_ASSERT( serial_team ); |
| KMP_MB(); |
| |
| if ( __kmp_tasking_mode != tskm_immediate_exec ) { |
| KMP_DEBUG_ASSERT( this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team ); |
| KMP_DEBUG_ASSERT( serial_team->t.t_task_team == NULL ); |
| KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", |
| global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); |
| this_thr->th.th_task_team = NULL; |
| } |
| |
| #if OMP_40_ENABLED |
| kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; |
| if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { |
| proc_bind = proc_bind_false; |
| } |
| else if ( proc_bind == proc_bind_default ) { |
| // |
| // No proc_bind clause was specified, so use the current value |
| // of proc-bind-var for this parallel region. |
| // |
| proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; |
| } |
| // |
| // Reset for next parallel region |
| // |
| this_thr->th.th_set_proc_bind = proc_bind_default; |
| #endif /* OMP_40_ENABLED */ |
| |
| if( this_thr->th.th_team != serial_team ) { |
| // Nested level will be an index in the nested nthreads array |
| int level = this_thr->th.th_team->t.t_level; |
| |
| if( serial_team->t.t_serialized ) { |
| /* this serial team was already used |
| * TODO increase performance by making this locks more specific */ |
| kmp_team_t *new_team; |
| int tid = this_thr->th.th_info.ds.ds_tid; |
| |
| __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, |
| #if OMP_40_ENABLED |
| proc_bind, |
| #endif |
| & this_thr->th.th_current_task->td_icvs, |
| 0 USE_NESTED_HOT_ARG(NULL) ); |
| __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); |
| KMP_ASSERT( new_team ); |
| |
| /* setup new serialized team and install it */ |
| new_team->t.t_threads[0] = this_thr; |
| new_team->t.t_parent = this_thr->th.th_team; |
| serial_team = new_team; |
| this_thr->th.th_serial_team = serial_team; |
| |
| KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", |
| global_tid, serial_team ) ); |
| |
| |
| /* TODO the above breaks the requirement that if we run out of |
| * resources, then we can still guarantee that serialized teams |
| * are ok, since we may need to allocate a new one */ |
| } else { |
| KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", |
| global_tid, serial_team ) ); |
| } |
| |
| /* we have to initialize this serial team */ |
| KMP_DEBUG_ASSERT( serial_team->t.t_threads ); |
| KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); |
| KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); |
| serial_team->t.t_ident = loc; |
| serial_team->t.t_serialized = 1; |
| serial_team->t.t_nproc = 1; |
| serial_team->t.t_parent = this_thr->th.th_team; |
| serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; |
| this_thr->th.th_team = serial_team; |
| serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; |
| |
| KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", |
| global_tid, this_thr->th.th_current_task ) ); |
| KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); |
| this_thr->th.th_current_task->td_flags.executing = 0; |
| |
| __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); |
| |
| /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for |
| each serialized task represented by team->t.t_serialized? */ |
| copy_icvs( |
| & this_thr->th.th_current_task->td_icvs, |
| & this_thr->th.th_current_task->td_parent->td_icvs ); |
| |
| // Thread value exists in the nested nthreads array for the next nested level |
| if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { |
| this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; |
| } |
| |
| #if OMP_40_ENABLED |
| if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { |
| this_thr->th.th_current_task->td_icvs.proc_bind |
| = __kmp_nested_proc_bind.bind_types[ level + 1 ]; |
| } |
| #endif /* OMP_40_ENABLED */ |
| |
| this_thr->th.th_info.ds.ds_tid = 0; |
| |
| /* set thread cache values */ |
| this_thr->th.th_team_nproc = 1; |
| this_thr->th.th_team_master = this_thr; |
| this_thr->th.th_team_serialized = 1; |
| |
| serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; |
| serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; |
| |
| propagateFPControl (serial_team); |
| |
| /* check if we need to allocate dispatch buffers stack */ |
| KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); |
| if ( !serial_team->t.t_dispatch->th_disp_buffer ) { |
| serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) |
| __kmp_allocate( sizeof( dispatch_private_info_t ) ); |
| } |
| this_thr->th.th_dispatch = serial_team->t.t_dispatch; |
| |
| KMP_MB(); |
| |
| } else { |
| /* this serialized team is already being used, |
| * that's fine, just add another nested level */ |
| KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); |
| KMP_DEBUG_ASSERT( serial_team->t.t_threads ); |
| KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); |
| ++ serial_team->t.t_serialized; |
| this_thr->th.th_team_serialized = serial_team->t.t_serialized; |
| |
| // Nested level will be an index in the nested nthreads array |
| int level = this_thr->th.th_team->t.t_level; |
| // Thread value exists in the nested nthreads array for the next nested level |
| if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { |
| this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; |
| } |
| serial_team->t.t_level++; |
| KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", |
| global_tid, serial_team, serial_team->t.t_level ) ); |
| |
| /* allocate/push dispatch buffers stack */ |
| KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); |
| { |
| dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) |
| __kmp_allocate( sizeof( dispatch_private_info_t ) ); |
| disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; |
| serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; |
| } |
| this_thr->th.th_dispatch = serial_team->t.t_dispatch; |
| |
| KMP_MB(); |
| } |
| |
| if ( __kmp_env_consistency_check ) |
| __kmp_push_parallel( global_tid, NULL ); |
| |
| #if USE_ITT_BUILD |
| // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment |
| if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) |
| { |
| this_thr->th.th_ident = loc; |
| // 0 - no barriers; 1 - serialized parallel |
| __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 ); |
| } |
| // Save the start of the "parallel" region for VTune. This is the join barrier begin at the same time. |
| if( ( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && |
| __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) || KMP_ITT_DEBUG ) |
| { |
| this_thr->th.th_ident = loc; |
| #if USE_ITT_NOTIFY |
| if( this_thr->th.th_team->t.t_level == 1 ) { |
| serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); |
| } |
| #endif |
| } |
| #endif /* USE_ITT_BUILD */ |
| } |
| |
| /* most of the work for a fork */ |
| /* return true if we really went parallel, false if serialized */ |
| int |
| __kmp_fork_call( |
| ident_t * loc, |
| int gtid, |
| enum fork_context_e call_context, // Intel, GNU, ... |
| kmp_int32 argc, |
| microtask_t microtask, |
| launch_t invoker, |
| /* TODO: revert workaround for Intel(R) 64 tracker #96 */ |
| #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX |
| va_list * ap |
| #else |
| va_list ap |
| #endif |
| ) |
| { |
| void **argv; |
| int i; |
| int master_tid; |
| int master_this_cons; |
| kmp_team_t *team; |
| kmp_team_t *parent_team; |
| kmp_info_t *master_th; |
| kmp_root_t *root; |
| int nthreads; |
| int master_active; |
| int master_set_numthreads; |
| int level; |
| #if OMP_40_ENABLED |
| int active_level; |
| int teams_level; |
| #endif |
| #if KMP_NESTED_HOT_TEAMS |
| kmp_hot_team_ptr_t **p_hot_teams; |
| #endif |
| { // KMP_TIME_BLOCK |
| KMP_TIME_BLOCK(KMP_fork_call); |
| |
| KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); |
| if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) { |
| /* Some systems prefer the stack for the root thread(s) to start with */ |
| /* some gap from the parent stack to prevent false sharing. */ |
| void *dummy = alloca(__kmp_stkpadding); |
| /* These 2 lines below are so this does not get optimized out */ |
| if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) |
| __kmp_stkpadding += (short)((kmp_int64)dummy); |
| } |
| |
| /* initialize if needed */ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown |
| if( ! TCR_4(__kmp_init_parallel) ) |
| __kmp_parallel_initialize(); |
| |
| /* setup current data */ |
| master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown |
| parent_team = master_th->th.th_team; |
| master_tid = master_th->th.th_info.ds.ds_tid; |
| master_this_cons = master_th->th.th_local.this_construct; |
| root = master_th->th.th_root; |
| master_active = root->r.r_active; |
| master_set_numthreads = master_th->th.th_set_nproc; |
| // Nested level will be an index in the nested nthreads array |
| level = parent_team->t.t_level; |
| #if OMP_40_ENABLED |
| active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed |
| teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams |
| #endif |
| #if KMP_NESTED_HOT_TEAMS |
| p_hot_teams = &master_th->th.th_hot_teams; |
| if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { |
| *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( |
| sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); |
| (*p_hot_teams)[0].hot_team = root->r.r_hot_team; |
| (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) |
| } |
| #endif |
| |
| |
| master_th->th.th_ident = loc; |
| |
| #if OMP_40_ENABLED |
| if ( master_th->th.th_teams_microtask && |
| ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { |
| // AC: This is start of parallel that is nested inside teams construct. |
| // The team is actual (hot), all workers are ready at the fork barrier. |
| // No lock needed to initialize the team a bit, then free workers. |
| parent_team->t.t_ident = loc; |
| parent_team->t.t_argc = argc; |
| argv = (void**)parent_team->t.t_argv; |
| for( i=argc-1; i >= 0; --i ) |
| /* TODO: revert workaround for Intel(R) 64 tracker #96 */ |
| #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX |
| *argv++ = va_arg( *ap, void * ); |
| #else |
| *argv++ = va_arg( ap, void * ); |
| #endif |
| /* Increment our nested depth levels, but not increase the serialization */ |
| if ( parent_team == master_th->th.th_serial_team ) { |
| // AC: we are in serialized parallel |
| __kmpc_serialized_parallel(loc, gtid); |
| KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); |
| parent_team->t.t_serialized--; // AC: need this in order enquiry functions |
| // work correctly, will restore at join time |
| KMP_TIME_BLOCK(OMP_work); |
| __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv ); |
| return TRUE; |
| } |
| parent_team->t.t_pkfn = microtask; |
| parent_team->t.t_invoke = invoker; |
| KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); |
| parent_team->t.t_active_level ++; |
| parent_team->t.t_level ++; |
| |
| /* Change number of threads in the team if requested */ |
| if ( master_set_numthreads ) { // The parallel has num_threads clause |
| if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { |
| // AC: only can reduce the number of threads dynamically, cannot increase |
| kmp_info_t **other_threads = parent_team->t.t_threads; |
| parent_team->t.t_nproc = master_set_numthreads; |
| for ( i = 0; i < master_set_numthreads; ++i ) { |
| other_threads[i]->th.th_team_nproc = master_set_numthreads; |
| } |
| // Keep extra threads hot in the team for possible next parallels |
| } |
| master_th->th.th_set_nproc = 0; |
| } |
| |
| |
| KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); |
| __kmp_internal_fork( loc, gtid, parent_team ); |
| KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); |
| |
| /* Invoke microtask for MASTER thread */ |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", |
| gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); |
| |
| { |
| KMP_TIME_BLOCK(OMP_work); |
| if (! parent_team->t.t_invoke( gtid )) { |
| KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); |
| } |
| } |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", |
| gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); |
| KMP_MB(); /* Flush all pending memory write invalidates. */ |
| |
| KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); |
| |
| return TRUE; |
| } // Parallel closely nested in teams construct |
| #endif /* OMP_40_ENABLED */ |
| |
| #if KMP_DEBUG |
| if ( __kmp_tasking_mode != tskm_immediate_exec ) { |
| KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team ); |
| } |
| #endif |
| |
| /* determine how many new threads we can use */ |
| __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { |
| nthreads = 1; |
| } else { |
| nthreads = master_set_numthreads ? |
| master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task |
| nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads |
| #if OMP_40_ENABLED |
| /* AC: If we execute teams from parallel region (on host), then teams should be created |
| but each can only have 1 thread if nesting is disabled. If teams called from serial region, |
| then teams and their threads should be created regardless of the nesting setting. */ |
| , ((ap==NULL && active_level==0) || |
| (ap && teams_level>0 && teams_level==level)) |
| #endif /* OMP_40_ENABLED */ |
| ); |
| } |
| KMP_DEBUG_ASSERT( nthreads > 0 ); |
| |
| /* If we temporarily changed the set number of threads then restore it now */ |
| master_th->th.th_set_nproc = 0; |
| |
| |
| /* create a serialized parallel region? */ |
| if ( nthreads == 1 ) { |
| /* josh todo: hypothetical question: what do we do for OS X*? */ |
| #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) |
| void * args[ argc ]; |
| #else |
| void * * args = (void**) alloca( argc * sizeof( void * ) ); |
| #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ |
| |
| __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); |
| |
| __kmpc_serialized_parallel(loc, gtid); |
| |
| if ( call_context == fork_context_intel ) { |
| /* TODO this sucks, use the compiler itself to pass args! :) */ |
| master_th->th.th_serial_team->t.t_ident = loc; |
| #if OMP_40_ENABLED |
| if ( !ap ) { |
| // revert change made in __kmpc_serialized_parallel() |
| master_th->th.th_serial_team->t.t_level--; |
| // Get args from parent team for teams construct |
| { |
| KMP_TIME_BLOCK(OMP_work); |
| __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv ); |
| } |
| } else if ( microtask == (microtask_t)__kmp_teams_master ) { |
| KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); |
| team = master_th->th.th_team; |
| //team->t.t_pkfn = microtask; |
| team->t.t_invoke = invoker; |
| __kmp_alloc_argv_entries( argc, team, TRUE ); |
| team->t.t_argc = argc; |
| argv = (void**) team->t.t_argv; |
| if ( ap ) { |
| for( i=argc-1; i >= 0; --i ) |
| // TODO: revert workaround for Intel(R) 64 tracker #96 |
| # if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX |
| *argv++ = va_arg( *ap, void * ); |
| # else |
| *argv++ = va_arg( ap, void * ); |
| # endif |
| } else { |
| for( i=0; i < argc; ++i ) |
| // Get args from parent team for teams construct |
| argv[i] = parent_team->t.t_argv[i]; |
| } |
| // AC: revert change made in __kmpc_serialized_parallel() |
| // because initial code in teams should have level=0 |
| team->t.t_level--; |
| // AC: call special invoker for outer "parallel" of the teams construct |
| { |
| KMP_TIME_BLOCK(OMP_work); |
| invoker(gtid); |
| } |
| } else { |
| #endif /* OMP_40_ENABLED */ |
| argv = args; |
| for( i=argc-1; i >= 0; --i ) |
| // TODO: revert workaround for Intel(R) 64 tracker #96 |
| #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX |
| *argv++ = va_arg( *ap, void * ); |
| #else |
| *argv++ = va_arg( ap, void * ); |
| #endif |
| KMP_MB(); |
| { |
| KMP_TIME_BLOCK(OMP_work); |
| __kmp_invoke_microtask( microtask, gtid, 0, argc, args ); |
| } |
| #if OMP_40_ENABLED |
| } |
| #endif /* OMP_40_ENABLED */ |
| } |
| else if ( call_context == fork_context_gnu ) { |
| // we were called from GNU native code |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); |
| return FALSE; |
| } |
| else { |
| KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); |
| } |
| |
| |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); |
| KMP_MB(); |
| return FALSE; |
| } |
| |
| // GEH: only modify the executing flag in the case when not serialized |
| // serialized case is handled in kmpc_serialized_parallel |
| KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", |
| parent_team->t.t_active_level, master_th, master_th->th.th_current_task, |
| master_th->th.th_current_task->td_icvs.max_active_levels ) ); |
| // TODO: GEH - cannot do this assertion because root thread not set up as executing |
| // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); |
| master_th->th.th_current_task->td_flags.executing = 0; |
| |
| #if OMP_40_ENABLED |
| if ( !master_th->th.th_teams_microtask || level > teams_level ) |
| #endif /* OMP_40_ENABLED */ |
| { |
| /* Increment our nested depth level */ |
| KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); |
| } |
| |
| // See if we need to make a copy of the ICVs. |
| int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; |
| if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { |
| nthreads_icv = __kmp_nested_nth.nth[level+1]; |
| } |
| else { |
| nthreads_icv = 0; // don't update |
| } |
| |
| #if OMP_40_ENABLED |
| // Figure out the proc_bind_policy for the new team. |
| kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; |
| kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update |
| if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { |
| proc_bind = proc_bind_false; |
| } |
| else { |
| if (proc_bind == proc_bind_default) { |
| // No proc_bind clause specified; use current proc-bind-var for this parallel region |
| proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; |
| } |
| /* else: The proc_bind policy was specified explicitly on parallel clause. This |
| overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ |
| // Figure the value of proc-bind-var for the child threads. |
| if ((level+1 < __kmp_nested_proc_bind.used) |
| && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { |
| proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; |
| } |
| } |
| |
| // Reset for next parallel region |
| master_th->th.th_set_proc_bind = proc_bind_default; |
| #endif /* OMP_40_ENABLED */ |
| |
| if ((nthreads_icv > 0) |
| #if OMP_40_ENABLED |
| || (proc_bind_icv != proc_bind_default) |
| #endif /* OMP_40_ENABLED */ |
| ) { |
| kmp_internal_control_t new_icvs; |
| copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); |
| new_icvs.next = NULL; |
| if (nthreads_icv > 0) { |
| new_icvs.nproc = nthreads_icv; |
| } |
| |
| #if OMP_40_ENABLED |
| if (proc_bind_icv != proc_bind_default) { |
| new_icvs.proc_bind = proc_bind_icv; |
| } |
| #endif /* OMP_40_ENABLED */ |
| |
| /* allocate a new parallel team */ |
| KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); |
| team = __kmp_allocate_team(root, nthreads, nthreads, |
| #if OMP_40_ENABLED |
| proc_bind, |
| #endif |
| &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); |
| } else { |
| /* allocate a new parallel team */ |
| KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); |
| team = __kmp_allocate_team(root, nthreads, nthreads, |
| #if OMP_40_ENABLED |
| proc_bind, |
| #endif |
| &master_th->th.th_current_task->td_icvs, argc |
| USE_NESTED_HOT_ARG(master_th) ); |
| } |
| KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); |
| |
| /* setup the new team */ |
| team->t.t_master_tid = master_tid; |
| team->t.t_master_this_cons = master_this_cons; |
| team->t.t_ident = loc; |
| team->t.t_parent = parent_team; |
| TCW_SYNC_PTR(team->t.t_pkfn, microtask); |
| team->t.t_invoke = invoker; /* TODO move this to root, maybe */ |
| // TODO: parent_team->t.t_level == INT_MAX ??? |
| #if OMP_40_ENABLED |
| if ( !master_th->th.th_teams_microtask || level > teams_level ) { |
| #endif /* OMP_40_ENABLED */ |
| team->t.t_level = parent_team->t.t_level + 1; |
| team->t.t_active_level = parent_team->t.t_active_level + 1; |
| #if OMP_40_ENABLED |
| } else { |
| // AC: Do not increase parallel level at start of the teams construct |
| team->t.t_level = parent_team->t.t_level; |
| team->t.t_active_level = parent_team->t.t_active_level; |
| } |
| #endif /* OMP_40_ENABLED */ |
| team->t.t_sched = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule |
| |
| // Update the floating point rounding in the team if required. |
| propagateFPControl(team); |
| |
| if ( __kmp_tasking_mode != tskm_immediate_exec ) { |
| // Set master's task team to team's task team. Unless this is hot team, it should be NULL. |
| KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team ); |
| KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", |
| __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, |
| parent_team, team->t.t_task_team, team ) ); |
| master_th->th.th_task_team = team->t.t_task_team; |
| #if !KMP_NESTED_HOT_TEAMS |
| KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); |
| #endif |
| } |
| |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", |
| gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); |
| KMP_DEBUG_ASSERT( team != root->r.r_hot_team || |
| ( team->t.t_master_tid == 0 && |
| ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); |
| KMP_MB(); |
| |
| /* now, setup the arguments */ |
| argv = (void**)team->t.t_argv; |
| #if OMP_40_ENABLED |
| if ( ap ) { |
| #endif /* OMP_40_ENABLED */ |
| for ( i=argc-1; i >= 0; --i ) |
| // TODO: revert workaround for Intel(R) 64 tracker #96 |
| #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX |
| *argv++ = va_arg( *ap, void * ); |
| #else |
| *argv++ = va_arg( ap, void * ); |
| #endif |
| #if OMP_40_ENABLED |
| } else { |
| for ( i=0; i < argc; ++i ) |
| // Get args from parent team for teams construct |
| argv[i] = team->t.t_parent->t.t_argv[i]; |
| } |
| #endif /* OMP_40_ENABLED */ |
| |
| /* now actually fork the threads */ |
| team->t.t_master_active = master_active; |
| if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong |
| root->r.r_active = TRUE; |
| |
| __kmp_fork_team_threads( root, team, master_th, gtid ); |
| __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); |
| |
| |
| __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| |
| #if USE_ITT_BUILD |
| // Mark start of "parallel" region for VTune. Only use one of frame notification scheme at the moment. |
| if ((__itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) || KMP_ITT_DEBUG) |
| # if OMP_40_ENABLED |
| if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master) |
| // Either not in teams or the outer fork of the teams construct |
| # endif /* OMP_40_ENABLED */ |
| { |
| __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); |
| } |
| kmp_uint64 tmp_time = 0; |
| #if USE_ITT_NOTIFY |
| if ( __itt_get_timestamp_ptr ) |
| tmp_time = __itt_get_timestamp(); |
| #endif |
| if ((__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode==3)|| KMP_ITT_DEBUG) |
| # if OMP_40_ENABLED |
| if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master) |
| // Either not in teams or the outer fork of the teams construct |
| # endif /* OMP_40_ENABLED */ |
| team->t.t_region_time = tmp_time; |
| |
| // Internal fork - report frame begin |
| if ((__kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3) && __itt_frame_submit_v3_ptr ) { |
| if (!(team->t.t_active_level > 1)) { |
| master_th->th.th_frame_time = tmp_time; |
| } |
| } |
| #endif /* USE_ITT_BUILD */ |
| |
| /* now go on and do the work */ |
| KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); |
| KMP_MB(); |
| KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", |
| root, team, master_th, gtid)); |
| |
| #if USE_ITT_BUILD |
| if ( __itt_stack_caller_create_ptr ) { |
| team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier |
| } |
| #endif /* USE_ITT_BUILD */ |
| |
| #if OMP_40_ENABLED |
| if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute |
| #endif /* OMP_40_ENABLED */ |
| { |
| __kmp_internal_fork( loc, gtid, team ); |
| KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", |
| root, team, master_th, gtid)); |
| } |
| |
| if (call_context == fork_context_gnu) { |
| KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); |
| return TRUE; |
| } |
| |
| /* Invoke microtask for MASTER thread */ |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", |
| gtid, team->t.t_id, team->t.t_pkfn ) ); |
| } // END of timer KMP_fork_call block |
| |
| { |
| //KMP_TIME_BLOCK(OMP_work); |
| KMP_TIME_BLOCK(USER_master_invoke); |
| if (! team->t.t_invoke( gtid )) { |
| KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); |
| } |
| } |
| KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", |
| gtid, team->t.t_id, team->t.t_pkfn ) ); |
| KMP_MB(); /* Flush all pending memory write invalidates. */ |
| |
| KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); |
| |
| return TRUE; |
| } |
| |
| void |
| __kmp_join_call(ident_t *loc, int gtid |
| #if OMP_40_ENABLED |
| , int exit_teams |
| #endif /* OMP_40_ENABLED */ |
| ) |
| { |
| KMP_TIME_BLOCK(KMP_join_call); |
| kmp_team_t *team; |
| kmp_team_t *parent_team; |
| kmp_info_t *master_th; |
| kmp_root_t *root; |
| int master_active; |
| int i; |
| |
| KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); |
| |
| /* setup current data */ |
| master_th = __kmp_threads[ gtid ]; |
| root = master_th->th.th_root; |
| team = master_th->th.th_team; |
| parent_team = team->t.t_parent; |
| |
| master_th->th.th_ident = loc; |
| |
| #if KMP_DEBUG |
| if ( __kmp_tasking_mode != tskm_immediate_exec ) { |
| KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", |
| __kmp_gtid_from_thread( master_th ), team, |
| team->t.t_task_team, master_th->th.th_task_team) ); |
| KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team ); |
| } |
| #endif |
| |
| if( team->t.t_serialized ) { |
| #if OMP_40_ENABLED |
| if ( master_th->th.th_teams_microtask ) { |
| // We are in teams construct |
| int level = team->t.t_level; |
| int tlevel = master_th->th.th_teams_level; |
| if ( level == tlevel ) { |
| // AC: we haven't incremented it earlier at start of teams construct, |
| // so do it here - at the end of teams construct |
| team->t.t_level++; |
| } else if ( level == tlevel + 1 ) { |
| // AC: we are exiting parallel inside teams, need to increment serialization |
| // in order to restore it in the next call to __kmpc_end_serialized_parallel |
| team->t.t_serialized++; |
| } |
| } |
| #endif /* OMP_40_ENABLED */ |
| __kmpc_end_serialized_parallel( loc, gtid ); |
| return; |
| } |
| |
| master_active = team->t.t_master_active; |
| |
| #if OMP_40_ENABLED |
| if (!exit_teams) |
| #endif /* OMP_40_ENABLED */ |
| { |
| // AC: No barrier for internal teams at exit from teams construct. |
| // But there is barrier for external team (league). |
| __kmp_internal_join( loc, gtid, team ); |
| } |
| KMP_MB(); |
| |
| #if USE_ITT_BUILD |
| if ( __itt_stack_caller_create_ptr ) { |
| __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier |
| } |
| |
| // Mark end of "parallel" region for VTune. Only use one of frame notification scheme at the moment. |
| if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) |
| # if OMP_40_ENABLED |
| if ( !master_th->th.th_teams_microtask /* not in teams */ || |
| ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) ) |
| // Either not in teams or exiting teams region |
| // (teams is a frame and no other frames inside the teams) |
| # endif /* OMP_40_ENABLED */ |
| { |
| master_th->th.th_ident = loc; |
| __kmp_itt_region_joined( gtid ); |
| } |
| if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG ) |
| # if OMP_40_ENABLED |
| if ( !master_th->th.th_teams_microtask /* not in teams */ || |
| ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) ) |
| // Either not in teams or exiting teams region |
| // (teams is a frame and no other frames inside the teams) |
| # endif /* OMP_40_ENABLED */ |
| { |
| master_th->th.th_ident = loc; |
| __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 0, loc, master_th->th.th_team_nproc, 1 ); |
| } |
| #endif /* USE_ITT_BUILD */ |
| |
| #if OMP_40_ENABLED |
| if ( master_th->th.th_teams_microtask && |
| !exit_teams && |
| team->t.t_pkfn != (microtask_t)__kmp_teams_master && |
| team->t.t_level == master_th->th.th_teams_level + 1 ) { |
| // AC: We need to leave the team structure intact at the end |
| // of parallel inside the teams construct, so that at the next |
| // parallel same (hot) team works, only adjust nesting levels |
| |
| /* Decrement our nested depth level */ |
| team->t.t_level --; |
| team->t.t_active_level --; |
| KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); |
| |
| /* Restore number of threads in the team if needed */ |
| if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { |
| int old_num = master_th->th.th_team_nproc; |
| int new_num = master_th->th.th_teams_size.nth; |
| kmp_info_t **other_threads = team->t.t_threads; |
| team->t.t_nproc = new_num; |
| for ( i = 0; i < old_num; ++i ) { |
| other_threads[i]->th.th_team_nproc = new_num; |
| } |
| // Adjust states of non-used threads of the team |
| for ( i = old_num; i < new_num; ++i ) { |
| // Re-initialize thread's barrier data. |
| int b; |
| kmp_balign_t * balign = other_threads[i]->th.th_bar; |
| for ( b = 0; b < bs_last_barrier; ++ b ) { |
| balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; |
| KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); |
| } |
| // Synchronize thread's task state |
| other_threads[i]->th.th_task_state = master_th->th.th_task_state; |
| } |
| } |
| return; |
| } |
| #endif /* OMP_40_ENABLED */ |
| |
| /* do cleanup and restore the parent team */ |
| master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; |
| master_th->th.th_local.this_construct = team->t.t_master_this_cons; |
| |
| master_th->th.th_dispatch = |
| & parent_team->t.t_dispatch[ team->t.t_master_tid ]; |
| |
| /* jc: The following lock has instructions with REL and ACQ semantics, |
| separating the parallel user code called in this parallel region |
| from the serial user code called after this function returns. |
| */ |
| __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| #if OMP_40_ENABLED |
| if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) |
| #endif /* OMP_40_ENABLED */ |
| { |
| /* Decrement our nested depth level */ |
| KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); |
| } |
| KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); |
| |
| KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", |
| 0, master_th, team ) ); |
| __kmp_pop_current_task_from_thread( master_th ); |
| |
| #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED |
| // |
| // Restore master thread's partition. |
| // |
| master_th->th.th_first_place = team->t.t_first_place; |
| master_th->th.th_last_place = team->t.t_last_place; |
| #endif /* OMP_40_ENABLED */ |
| |
| updateHWFPControl (team); |
| |
| if ( root->r.r_active != master_active ) |
| root->r.r_active = master_active; |
| |
| __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads |
| |
| /* this race was fun to find. make sure the following is in the critical |
| * region otherwise assertions may fail occasiounally since the old team |
| * may be reallocated and the hierarchy appears inconsistent. it is |
| * actually safe to run and won't cause any bugs, but will cause thoose |
| * assertion failures. it's only one deref&assign so might as well put this |
| * in the critical region */ |
| master_th->th.th_team = parent_team; |
| master_th->th.th_team_nproc = parent_team->t.t_nproc; |
| master_th->th.th_team_master = parent_team->t.t_threads[0]; |
| master_th->th.th_team_serialized = parent_team->t.t_serialized; |
| |
| /* restore serialized team, if need be */ |
| if( parent_team->t.t_serialized && |
| parent_team != master_th->th.th_serial_team && |
| parent_team != root->r.r_root_team ) { |
| __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); |
| master_th->th.th_serial_team = parent_team; |
| } |
| |
| if ( __kmp_tasking_mode != tskm_immediate_exec ) { |
| // |
| // Copy the task team from the new child / old parent team |
| // to the thread. If non-NULL, copy the state flag also. |
| // |
| if ( ( master_th->th.th_task_team = parent_team->t.t_task_team ) != NULL ) { |
| master_th->th.th_task_state = master_th->th.th_task_team->tt.tt_state; |
| } |
| KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", |
| __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, |
| parent_team ) ); |
| } |
| |
| // TODO: GEH - cannot do this assertion because root thread not set up as executing |
| // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); |
| master_th->th.th_current_task->td_flags.executing = 1; |
| |
| __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| KMP_MB(); |
| KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); |
| } |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* Check whether we should push an internal control record onto the |
| serial team stack. If so, do it. */ |
| void |
| __kmp_save_internal_controls ( kmp_info_t * thread ) |
| { |
| |
| if ( thread->th.th_team != thread->th.th_serial_team ) { |
| return; |
| } |
| if (thread->th.th_team->t.t_serialized > 1) { |
| int push = 0; |
| |
| if (thread->th.th_team->t.t_control_stack_top == NULL) { |
| push = 1; |
| } else { |
| if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != |
| thread->th.th_team->t.t_serialized ) { |
| push = 1; |
| } |
| } |
| if (push) { /* push a record on the serial team's stack */ |
| kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); |
| |
| copy_icvs( control, & thread->th.th_current_task->td_icvs ); |
| |
| control->serial_nesting_level = thread->th.th_team->t.t_serialized; |
| |
| control->next = thread->th.th_team->t.t_control_stack_top; |
| thread->th.th_team->t.t_control_stack_top = control; |
| } |
| } |
| } |
| |
| /* Changes set_nproc */ |
| void |
| __kmp_set_num_threads( int new_nth, int gtid ) |
| { |
| kmp_info_t *thread; |
| kmp_root_t *root; |
| |
| KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| if (new_nth < 1) |
| new_nth = 1; |
| else if (new_nth > __kmp_max_nth) |
| new_nth = __kmp_max_nth; |
| |
| thread = __kmp_threads[gtid]; |
| |
| __kmp_save_internal_controls( thread ); |
| |
| set__nproc( thread, new_nth ); |
| |
| // |
| // If this omp_set_num_threads() call will cause the hot team size to be |
| // reduced (in the absence of a num_threads clause), then reduce it now, |
| // rather than waiting for the next parallel region. |
| // |
| root = thread->th.th_root; |
| if ( __kmp_init_parallel && ( ! root->r.r_active ) |
| && ( root->r.r_hot_team->t.t_nproc > new_nth ) |
| #if KMP_NESTED_HOT_TEAMS |
| && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode |
| #endif |
| ) { |
| kmp_team_t *hot_team = root->r.r_hot_team; |
| int f; |
| |
| __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| |
| if ( __kmp_tasking_mode != tskm_immediate_exec ) { |
| kmp_task_team_t *task_team = hot_team->t.t_task_team; |
| if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { |
| // |
| // Signal the worker threads (esp. the extra ones) to stop |
| // looking for tasks while spin waiting. The task teams |
| // are reference counted and will be deallocated by the |
| // last worker thread. |
| // |
| KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 ); |
| TCW_SYNC_4( task_team->tt.tt_active, FALSE ); |
| KMP_MB(); |
| |
| KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n", |
| &hot_team->t.t_task_team ) ); |
| hot_team->t.t_task_team = NULL; |
| } |
| else { |
| KMP_DEBUG_ASSERT( task_team == NULL ); |
| } |
| } |
| |
| // |
| // Release the extra threads we don't need any more. |
| // |
| for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { |
| KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); |
| __kmp_free_thread( hot_team->t.t_threads[f] ); |
| hot_team->t.t_threads[f] = NULL; |
| } |
| hot_team->t.t_nproc = new_nth; |
| #if KMP_NESTED_HOT_TEAMS |
| if( thread->th.th_hot_teams ) { |
| KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); |
| thread->th.th_hot_teams[0].hot_team_nth = new_nth; |
| } |
| #endif |
| |
| |
| __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); |
| |
| // |
| // Update the t_nproc field in the threads that are still active. |
| // |
| for( f=0 ; f < new_nth; f++ ) { |
| KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); |
| hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; |
| } |
| // Special flag in case omp_set_num_threads() call |
| hot_team->t.t_size_changed = -1; |
| } |
| |
| } |
| |
| /* Changes max_active_levels */ |
| void |
| __kmp_set_max_active_levels( int gtid, int max_active_levels ) |
| { |
| kmp_info_t *thread; |
| |
| KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| // validate max_active_levels |
| if( max_active_levels < 0 ) { |
| KMP_WARNING( ActiveLevelsNegative, max_active_levels ); |
| // We ignore this call if the user has specified a negative value. |
| // The current setting won't be changed. The last valid setting will be used. |
| // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). |
| KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); |
| return; |
| } |
| if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { |
| // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] |
| // We allow a zero value. (implementation defined behavior) |
| } else { |
| KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT ); |
| max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; |
| // Current upper limit is MAX_INT. (implementation defined behavior) |
| // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) |
| // Actually, the flow should never get here until we use MAX_INT limit. |
| } |
| KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); |
| |
| thread = __kmp_threads[ gtid ]; |
| |
| __kmp_save_internal_controls( thread ); |
| |
| set__max_active_levels( thread, max_active_levels ); |
| |
| } |
| |
| /* Gets max_active_levels */ |
| int |
| __kmp_get_max_active_levels( int gtid ) |
| { |
| kmp_info_t *thread; |
| |
| KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| thread = __kmp_threads[ gtid ]; |
| KMP_DEBUG_ASSERT( thread->th.th_current_task ); |
| KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", |
| gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); |
| return thread->th.th_current_task->td_icvs.max_active_levels; |
| } |
| |
| /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ |
| void |
| __kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) |
| { |
| kmp_info_t *thread; |
| // kmp_team_t *team; |
| |
| KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| // Check if the kind parameter is valid, correct if needed. |
| // Valid parameters should fit in one of two intervals - standard or extended: |
| // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> |
| // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 |
| if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || |
| ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) |
| { |
| // TODO: Hint needs attention in case we change the default schedule. |
| __kmp_msg( |
| kmp_ms_warning, |
| KMP_MSG( ScheduleKindOutOfRange, kind ), |
| KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), |
| __kmp_msg_null |
| ); |
| kind = kmp_sched_default; |
| chunk = 0; // ignore chunk value in case of bad kind |
| } |
| |
| thread = __kmp_threads[ gtid ]; |
| |
| __kmp_save_internal_controls( thread ); |
| |
| if ( kind < kmp_sched_upper_std ) { |
| if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { |
| // differ static chunked vs. unchunked: |
| // chunk should be invalid to indicate unchunked schedule (which is the default) |
| thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; |
| } else { |
| thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; |
| } |
| } else { |
| // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; |
| thread->th.th_current_task->td_icvs.sched.r_sched_type = |
| __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; |
| } |
| if ( kind == kmp_sched_auto ) { |
| // ignore parameter chunk for schedule auto |
| thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; |
| } else { |
| thread->th.th_current_task->td_icvs.sched.chunk = chunk; |
| } |
| } |
| |
| /* Gets def_sched_var ICV values */ |
| void |
| __kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) |
| { |
| kmp_info_t *thread; |
| enum sched_type th_type; |
| int i; |
| |
| KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| thread = __kmp_threads[ gtid ]; |
| |
| //th_type = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type; |
| th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; |
| |
| switch ( th_type ) { |
| case kmp_sch_static: |
| case kmp_sch_static_greedy: |
| case kmp_sch_static_balanced: |
| *kind = kmp_sched_static; |
| *chunk = 0; // chunk was not set, try to show this fact via zero value |
| return; |
| case kmp_sch_static_chunked: |
| *kind = kmp_sched_static; |
| break; |
| case kmp_sch_dynamic_chunked: |
| *kind = kmp_sched_dynamic; |
| break; |
| case kmp_sch_guided_chunked: |
| case kmp_sch_guided_iterative_chunked: |
| case kmp_sch_guided_analytical_chunked: |
| *kind = kmp_sched_guided; |
| break; |
| case kmp_sch_auto: |
| *kind = kmp_sched_auto; |
| break; |
| case kmp_sch_trapezoidal: |
| *kind = kmp_sched_trapezoidal; |
| break; |
| /* |
| case kmp_sch_static_steal: |
| *kind = kmp_sched_static_steal; |
| break; |
| */ |
| default: |
| KMP_FATAL( UnknownSchedulingType, th_type ); |
| } |
| |
| //*chunk = thread->th.th_team->t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk; |
| *chunk = thread->th.th_current_task->td_icvs.sched.chunk; |
| } |
| |
| int |
| __kmp_get_ancestor_thread_num( int gtid, int level ) { |
| |
| int ii, dd; |
| kmp_team_t *team; |
| kmp_info_t *thr; |
| |
| KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| // validate level |
| if( level == 0 ) return 0; |
| if( level < 0 ) return -1; |
| thr = __kmp_threads[ gtid ]; |
| team = thr->th.th_team; |
| ii = team->t.t_level; |
| if( level > ii ) return -1; |
| |
| #if OMP_40_ENABLED |
| if( thr->th.th_teams_microtask ) { |
| // AC: we are in teams region where multiple nested teams have same level |
| int tlevel = thr->th.th_teams_level; // the level of the teams construct |
| if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) |
| KMP_DEBUG_ASSERT( ii >= tlevel ); |
| // AC: As we need to pass by the teams league, we need to artificially increase ii |
| if ( ii == tlevel ) { |
| ii += 2; // three teams have same level |
| } else { |
| ii ++; // two teams have same level |
| } |
| } |
| } |
| #endif |
| |
| if( ii == level ) return __kmp_tid_from_gtid( gtid ); |
| |
| dd = team->t.t_serialized; |
| level++; |
| while( ii > level ) |
| { |
| for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) |
| { |
| } |
| if( ( team->t.t_serialized ) && ( !dd ) ) { |
| team = team->t.t_parent; |
| continue; |
| } |
| if( ii > level ) { |
| team = team->t.t_parent; |
| dd = team->t.t_serialized; |
| ii--; |
| } |
| } |
| |
| return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); |
| } |
| |
| int |
| __kmp_get_team_size( int gtid, int level ) { |
| |
| int ii, dd; |
| kmp_team_t *team; |
| kmp_info_t *thr; |
| |
| KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| // validate level |
| if( level == 0 ) return 1; |
| if( level < 0 ) return -1; |
| thr = __kmp_threads[ gtid ]; |
| team = thr->th.th_team; |
| ii = team->t.t_level; |
| if( level > ii ) return -1; |
| |
| #if OMP_40_ENABLED |
| if( thr->th.th_teams_microtask ) { |
| // AC: we are in teams region where multiple nested teams have same level |
| int tlevel = thr->th.th_teams_level; // the level of the teams construct |
| if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) |
| KMP_DEBUG_ASSERT( ii >= tlevel ); |
| // AC: As we need to pass by the teams league, we need to artificially increase ii |
| if ( ii == tlevel ) { |
| ii += 2; // three teams have same level |
| } else { |
| ii ++; // two teams have same level |
| } |
| } |
| } |
| #endif |
| |
| while( ii > level ) |
| { |
| for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) |
| { |
| } |
| if( team->t.t_serialized && ( !dd ) ) { |
| team = team->t.t_parent; |
| continue; |
| } |
| if( ii > level ) { |
| team = team->t.t_parent; |
| ii--; |
| } |
| } |
| |
| return team->t.t_nproc; |
| } |
| |
| kmp_r_sched_t |
| __kmp_get_schedule_global() { |
| // This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) |
| // may be changed by kmp_set_defaults independently. So one can get the updated schedule here. |
| |
| kmp_r_sched_t r_sched; |
| |
| // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided |
| // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, |
| // and thus have different run-time schedules in different roots (even in OMP 2.5) |
| if ( __kmp_sched == kmp_sch_static ) { |
| r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) |
| } else if ( __kmp_sched == kmp_sch_guided_chunked ) { |
| r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) |
| } else { |
| r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other |
| } |
| |
| if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) |
| r_sched.chunk = KMP_DEFAULT_CHUNK; |
| } else { |
| r_sched.chunk = __kmp_chunk; |
| } |
| |
| return r_sched; |
| } |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| |
| /* |
| * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) |
| * at least argc number of *t_argv entries for the requested team. |
| */ |
| static void |
| __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) |
| { |
| |
| KMP_DEBUG_ASSERT( team ); |
| if( !realloc || argc > team->t.t_max_argc ) { |
| |
| KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", |
| team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); |
| /* if previously allocated heap space for args, free them */ |
| if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) |
| __kmp_free( (void *) team->t.t_argv ); |
| |
| if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { |
| /* use unused space in the cache line for arguments */ |
| team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; |
| KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", |
| team->t.t_id, team->t.t_max_argc )); |
| team->t.t_argv = &team->t.t_inline_argv[0]; |
| if ( __kmp_storage_map ) { |
| __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], |
| &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], |
| (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), |
| "team_%d.t_inline_argv", |
| team->t.t_id ); |
| } |
| } else { |
| /* allocate space for arguments in the heap */ |
| team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? |
| KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; |
| KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", |
| team->t.t_id, team->t.t_max_argc )); |
| team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); |
| if ( __kmp_storage_map ) { |
| __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], |
| sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", |
| team->t.t_id ); |
| } |
| } |
| } |
| } |
| |
| static void |
| __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) |
| { |
| int i; |
| int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2; |
| #if KMP_USE_POOLED_ALLOC |
| // AC: TODO: fix bug here: size of t_disp_buffer should not be multiplied by max_nth! |
| char *ptr = __kmp_allocate(max_nth * |
| ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*num_disp_buf |
| + sizeof(kmp_disp_t) + sizeof(int)*6 |
| //+ sizeof(int) |
| + sizeof(kmp_r_sched_t) |
| + sizeof(kmp_taskdata_t) ) ); |
| |
| team->t.t_threads = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth; |
| team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr; |
| ptr += sizeof(dispatch_shared_info_t) * num_disp_buff; |
| team->t.t_dispatch = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth; |
| team->t.t_set_nproc = (int*) ptr; ptr += sizeof(int) * max_nth; |
| team->t.t_set_dynamic = (int*) ptr; ptr += sizeof(int) * max_nth; |
| team->t.t_set_nested = (int*) ptr; ptr += sizeof(int) * max_nth; |
| team->t.t_set_blocktime = (int*) ptr; ptr += sizeof(int) * max_nth; |
| team->t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth; |
| team->t.t_set_bt_set = (int*) ptr; |
| ptr += sizeof(int) * max_nth; |
| //team->t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth; |
| team->t.t_set_sched = (kmp_r_sched_t*) ptr; |
| ptr += sizeof(kmp_r_sched_t) * max_nth; |
| team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr; |
| ptr += sizeof(kmp_taskdata_t) * max_nth; |
| #else |
| |
| team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); |
| team->t.t_disp_buffer = (dispatch_shared_info_t*) |
| __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); |
| team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); |
| //team->t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth ); |
| //team->t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth ); |
| team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); |
| #endif |
| team->t.t_max_nproc = max_nth; |
| |
| /* setup dispatch buffers */ |
| for(i = 0 ; i < num_disp_buff; ++i) |
| team->t.t_disp_buffer[i].buffer_index = i; |
| } |
| |
| static void |
| __kmp_free_team_arrays(kmp_team_t *team) { |
| /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ |
| int i; |
| for ( i = 0; i < team->t.t_max_nproc; ++ i ) { |
| if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { |
| __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); |
| team->t.t_dispatch[ i ].th_disp_buffer = NULL; |
| }; // if |
| }; // for |
| __kmp_free(team->t.t_threads); |
| #if !KMP_USE_POOLED_ALLOC |
| __kmp_free(team->t.t_disp_buffer); |
| __kmp_free(team->t.t_dispatch); |
| //__kmp_free(team->t.t_set_max_active_levels); |
| //__kmp_free(team->t.t_set_sched); |
| __kmp_free(team->t.t_implicit_task_taskdata); |
| #endif |
| team->t.t_threads = NULL; |
| team->t.t_disp_buffer = NULL; |
| team->t.t_dispatch = NULL; |
| //team->t.t_set_sched = 0; |
| //team->t.t_set_max_active_levels = 0; |
| team->t.t_implicit_task_taskdata = 0; |
| } |
| |
| static void |
| __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { |
| kmp_info_t **oldThreads = team->t.t_threads; |
| |
| #if !KMP_USE_POOLED_ALLOC |
| __kmp_free(team->t.t_disp_buffer); |
| __kmp_free(team->t.t_dispatch); |
| //__kmp_free(team->t.t_set_max_active_levels); |
| //__kmp_free(team->t.t_set_sched); |
| __kmp_free(team->t.t_implicit_task_taskdata); |
| #endif |
| __kmp_allocate_team_arrays(team, max_nth); |
| |
| memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); |
| |
| __kmp_free(oldThreads); |
| } |
| |
| static kmp_internal_control_t |
| __kmp_get_global_icvs( void ) { |
| |
| kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals |
| |
| #if OMP_40_ENABLED |
| KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); |
| #endif /* OMP_40_ENABLED */ |
| |
| kmp_internal_control_t g_icvs = { |
| 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field |
| (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread) |
| (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread) |
| (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set |
| __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime |
| __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals |
| __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread) |
| // (use a max ub on value if __kmp_parallel_initialize not called yet) |
| __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels |
| r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair |
| #if OMP_40_ENABLED |
| __kmp_nested_proc_bind.bind_types[0], |
| #endif /* OMP_40_ENABLED */ |
| NULL //struct kmp_internal_control *next; |
| }; |
| |
| return g_icvs; |
| } |
| |
| static kmp_internal_control_t |
| __kmp_get_x_global_icvs( const kmp_team_t *team ) { |
| |
| kmp_internal_control_t gx_icvs; |
| gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls |
| copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); |
| gx_icvs.next = NULL; |
| |
| return gx_icvs; |
| } |
| |
| static void |
| __kmp_initialize_root( kmp_root_t *root ) |
| { |
| int f; |
| kmp_team_t *root_team; |
| kmp_team_t *hot_team; |
| size_t disp_size, dispatch_size, bar_size; |
| int hot_team_max_nth; |
| kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals |
| kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); |
| KMP_DEBUG_ASSERT( root ); |
| KMP_ASSERT( ! root->r.r_begin ); |
| |
| /* setup the root state structure */ |
| __kmp_init_lock( &root->r.r_begin_lock ); |
| root->r.r_begin = FALSE; |
| root->r.r_active = FALSE; |
| root->r.r_in_parallel = 0; |
| root->r.r_blocktime = __kmp_dflt_blocktime; |
| root->r.r_nested = __kmp_dflt_nested; |
| |
| /* setup the root team for this task */ |
| /* allocate the root team structure */ |
| KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); |
| root_team = |
| __kmp_allocate_team( |
| root, |
| 1, // new_nproc |
| 1, // max_nproc |
| #if OMP_40_ENABLED |
| __kmp_nested_proc_bind.bind_types[0], |
| #endif |
| &r_icvs, |
| 0 // argc |
| USE_NESTED_HOT_ARG(NULL) // master thread is unknown |
| ); |
| |
| KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); |
| |
| root->r.r_root_team = root_team; |
| root_team->t.t_control_stack_top = NULL; |
| |
| /* initialize root team */ |
| root_team->t.t_threads[0] = NULL; |
| root_team->t.t_nproc = 1; |
| root_team->t.t_serialized = 1; |
| // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; |
| root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; |
| root_team->t.t_sched.chunk = r_sched.chunk; |
| KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", |
| root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); |
| |
| /* setup the hot team for this task */ |
| /* allocate the hot team structure */ |
| KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); |
| hot_team = |
| __kmp_allocate_team( |
| root, |
| 1, // new_nproc |
| __kmp_dflt_team_nth_ub * 2, // max_nproc |
| #if OMP_40_ENABLED |
| __kmp_nested_proc_bind.bind_types[0], |
| #endif |
| &r_icvs, |
| 0 // argc |
| USE_NESTED_HOT_ARG(NULL) // master thread is unknown |
| ); |
| KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); |
| |
| root->r.r_hot_team = hot_team; |
| root_team->t.t_control_stack_top = NULL; |
| |
| /* first-time initialization */ |
| hot_team->t.t_parent = root_team; |
| |
| /* initialize hot team */ |
| hot_team_max_nth = hot_team->t.t_max_nproc; |
| for ( f = 0; f < hot_team_max_nth; ++ f ) { |
| hot_team->t.t_threads[ f ] = NULL; |
| }; // for |
| hot_team->t.t_nproc = 1; |
| // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; |
| hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; |
| hot_team->t.t_sched.chunk = r_sched.chunk; |
| hot_team->t.t_size_changed = 0; |
| |
| } |
| |
| #ifdef KMP_DEBUG |
| |
| |
| typedef struct kmp_team_list_item { |
| kmp_team_p const * entry; |
| struct kmp_team_list_item * next; |
| } kmp_team_list_item_t; |
| typedef kmp_team_list_item_t * kmp_team_list_t; |
| |
| |
| static void |
| __kmp_print_structure_team_accum( // Add team to list of teams. |
| kmp_team_list_t list, // List of teams. |
| kmp_team_p const * team // Team to add. |
| ) { |
| |
| // List must terminate with item where both entry and next are NULL. |
| // Team is added to the list only once. |
| // List is sorted in ascending order by team id. |
| // Team id is *not* a key. |
| |
| kmp_team_list_t l; |
| |
| KMP_DEBUG_ASSERT( list != NULL ); |
| if ( team == NULL ) { |
| return; |
| }; // if |
| |
| __kmp_print_structure_team_accum( list, team->t.t_parent ); |
|