blob: 180c5339c28e822d1260e0b2cb73477aedd2f5a2 [file] [log] [blame]
/*
* kmp_runtime.c -- KPTS runtime support library
*/
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.txt for details.
//
//===----------------------------------------------------------------------===//
#include "kmp.h"
#include "kmp_atomic.h"
#include "kmp_wrapper_getpid.h"
#include "kmp_environment.h"
#include "kmp_itt.h"
#include "kmp_str.h"
#include "kmp_settings.h"
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_error.h"
#include "kmp_stats.h"
#include "kmp_wait_release.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
/* these are temporary issues to be dealt with */
#define KMP_USE_PRCTL 0
#if KMP_OS_WINDOWS
#include <process.h>
#endif
#if defined(KMP_GOMP_COMPAT)
char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
#endif /* defined(KMP_GOMP_COMPAT) */
char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
#if OMP_40_ENABLED
"4.0 (201307)";
#else
"3.1 (201107)";
#endif
#ifdef KMP_DEBUG
char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
#endif /* KMP_DEBUG */
#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
kmp_info_t __kmp_monitor;
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* Forward declarations */
void __kmp_cleanup( void );
static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc );
#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 );
#endif
static void __kmp_do_serial_initialize( void );
void __kmp_fork_barrier( int gtid, int tid );
void __kmp_join_barrier( int gtid );
void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc );
#ifdef USE_LOAD_BALANCE
static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
#endif
static int __kmp_expand_threads(int nWish, int nNeed);
#if KMP_OS_WINDOWS
static int __kmp_unregister_root_other_thread( int gtid );
#endif
static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique */
/* identifier of executing thread. */
/* returns KMP_GTID_DNE if we haven't been assigned a gtid */
int
__kmp_get_global_thread_id( )
{
int i;
kmp_info_t **other_threads;
size_t stack_data;
char *stack_addr;
size_t stack_size;
char *stack_base;
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
__kmp_nth, __kmp_all_nth ));
/* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
parallel region, made it return KMP_GTID_DNE to force serial_initialize by
caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
__kmp_init_gtid for this to work. */
if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
#ifdef KMP_TDATA_GTID
if ( TCR_4(__kmp_gtid_mode) >= 3) {
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
return __kmp_gtid;
}
#endif
if ( TCR_4(__kmp_gtid_mode) >= 2) {
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
return __kmp_gtid_get_specific();
}
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
stack_addr = (char*) & stack_data;
other_threads = __kmp_threads;
/*
ATT: The code below is a source of potential bugs due to unsynchronized access to
__kmp_threads array. For example:
1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
2. Current thread is suspended by OS.
3. Another thread unregisters and finishes (debug versions of free() may fill memory
with something like 0xEF).
4. Current thread is resumed.
5. Current thread reads junk from *thr.
TODO: Fix it.
--ln
*/
for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
if( !thr ) continue;
stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
/* stack grows down -- search through all of the active threads */
if( stack_addr <= stack_base ) {
size_t stack_diff = stack_base - stack_addr;
if( stack_diff <= stack_size ) {
/* The only way we can be closer than the allocated */
/* stack size is if we are running on this thread. */
KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
return i;
}
}
}
/* get specific to try and determine our gtid */
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
"thread, using TLS\n" ));
i = __kmp_gtid_get_specific();
/*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
/* if we havn't been assigned a gtid, then return code */
if( i<0 ) return i;
/* dynamically updated stack window for uber threads to avoid get_specific call */
if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
KMP_FATAL( StackOverflow, i );
}
stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
if( stack_addr > stack_base ) {
TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
} else {
TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
}
/* Reprint stack bounds for ubermaster since they have been refined */
if ( __kmp_storage_map ) {
char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase;
char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
__kmp_print_storage_map_gtid( i, stack_beg, stack_end,
other_threads[i]->th.th_info.ds.ds_stacksize,
"th_%d stack (refinement)", i );
}
return i;
}
int
__kmp_get_global_thread_id_reg( )
{
int gtid;
if ( !__kmp_init_serial ) {
gtid = KMP_GTID_DNE;
} else
#ifdef KMP_TDATA_GTID
if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
gtid = __kmp_gtid;
} else
#endif
if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
gtid = __kmp_gtid_get_specific();
} else {
KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
gtid = __kmp_get_global_thread_id();
}
/* we must be a new uber master sibling thread */
if( gtid == KMP_GTID_DNE ) {
KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
"Registering a new gtid.\n" ));
__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
if( !__kmp_init_serial ) {
__kmp_do_serial_initialize();
gtid = __kmp_gtid_get_specific();
} else {
gtid = __kmp_register_root(FALSE);
}
__kmp_release_bootstrap_lock( &__kmp_initz_lock );
/*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
}
KMP_DEBUG_ASSERT( gtid >=0 );
return gtid;
}
/* caller must hold forkjoin_lock */
void
__kmp_check_stack_overlap( kmp_info_t *th )
{
int f;
char *stack_beg = NULL;
char *stack_end = NULL;
int gtid;
KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
if ( __kmp_storage_map ) {
stack_end = (char *) th->th.th_info.ds.ds_stackbase;
stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
gtid = __kmp_gtid_from_thread( th );
if (gtid == KMP_GTID_MONITOR) {
__kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
"th_%s stack (%s)", "mon",
( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
} else {
__kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
"th_%d stack (%s)", gtid,
( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
}
}
/* No point in checking ubermaster threads since they use refinement and cannot overlap */
gtid = __kmp_gtid_from_thread( th );
if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
{
KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
if ( stack_beg == NULL ) {
stack_end = (char *) th->th.th_info.ds.ds_stackbase;
stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
}
for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
if( f_th && f_th != th ) {
char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
char *other_stack_beg = other_stack_end -
(size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
(stack_end > other_stack_beg && stack_end < other_stack_end)) {
/* Print the other stack values before the abort */
if ( __kmp_storage_map )
__kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
(size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
"th_%d stack (overlapped)",
__kmp_gtid_from_thread( f_th ) );
__kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
}
}
}
}
KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
}
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
void
__kmp_infinite_loop( void )
{
static int done = FALSE;
while (! done) {
KMP_YIELD( 1 );
}
}
#define MAX_MESSAGE 512
void
__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
char buffer[MAX_MESSAGE];
va_list ap;
va_start( ap, format);
KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
__kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
__kmp_vprintf( kmp_err, buffer, ap );
#if KMP_PRINT_DATA_PLACEMENT
int node;
if(gtid >= 0) {
if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
if( __kmp_storage_map_verbose ) {
node = __kmp_get_host_node(p1);
if(node < 0) /* doesn't work, so don't try this next time */
__kmp_storage_map_verbose = FALSE;
else {
char *last;
int lastNode;
int localProc = __kmp_get_cpu_from_gtid(gtid);
p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
if(localProc >= 0)
__kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
else
__kmp_printf_no_lock(" GTID %d\n", gtid);
# if KMP_USE_PRCTL
/* The more elaborate format is disabled for now because of the prctl hanging bug. */
do {
last = p1;
lastNode = node;
/* This loop collates adjacent pages with the same host node. */
do {
(char*)p1 += PAGE_SIZE;
} while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
__kmp_printf_no_lock(" %p-%p memNode %d\n", last,
(char*)p1 - 1, lastNode);
} while(p1 <= p2);
# else
__kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
(char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
if(p1 < p2) {
__kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
(char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
}
# endif
}
}
} else
__kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
}
#endif /* KMP_PRINT_DATA_PLACEMENT */
__kmp_release_bootstrap_lock( & __kmp_stdio_lock );
}
void
__kmp_warn( char const * format, ... )
{
char buffer[MAX_MESSAGE];
va_list ap;
if ( __kmp_generate_warnings == kmp_warnings_off ) {
return;
}
va_start( ap, format );
KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
__kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
__kmp_vprintf( kmp_err, buffer, ap );
__kmp_release_bootstrap_lock( & __kmp_stdio_lock );
va_end( ap );
}
void
__kmp_abort_process()
{
// Later threads may stall here, but that's ok because abort() will kill them.
__kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
if ( __kmp_debug_buf ) {
__kmp_dump_debug_buffer();
}; // if
if ( KMP_OS_WINDOWS ) {
// Let other threads know of abnormal termination and prevent deadlock
// if abort happened during library initialization or shutdown
__kmp_global.g.g_abort = SIGABRT;
/*
On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
works well, but this function is not available in VS7 (this is not problem for DLL, but
it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
not help, at least in some versions of MS C RTL.
It seems following sequence is the only way to simulate abort() and avoid pop-up error
box.
*/
raise( SIGABRT );
_exit( 3 ); // Just in case, if signal ignored, exit anyway.
} else {
abort();
}; // if
__kmp_infinite_loop();
__kmp_release_bootstrap_lock( & __kmp_exit_lock );
} // __kmp_abort_process
void
__kmp_abort_thread( void )
{
// TODO: Eliminate g_abort global variable and this function.
// In case of abort just call abort(), it will kill all the threads.
__kmp_infinite_loop();
} // __kmp_abort_thread
/* ------------------------------------------------------------------------ */
/*
* Print out the storage map for the major kmp_info_t thread data structures
* that are allocated together.
*/
static void
__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
{
__kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
__kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
"th_%d.th_info", gtid );
__kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
"th_%d.th_local", gtid );
__kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
__kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
&thr->th.th_bar[bs_plain_barrier+1],
sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
__kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
&thr->th.th_bar[bs_forkjoin_barrier+1],
sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
#if KMP_FAST_REDUCTION_BARRIER
__kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
&thr->th.th_bar[bs_reduction_barrier+1],
sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
#endif // KMP_FAST_REDUCTION_BARRIER
}
/*
* Print out the storage map for the major kmp_team_t team data structures
* that are allocated together.
*/
static void
__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
{
int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
__kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
header, team_id );
__kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
__kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
__kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
#if KMP_FAST_REDUCTION_BARRIER
__kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
#endif // KMP_FAST_REDUCTION_BARRIER
__kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
__kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
__kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
header, team_id );
__kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
}
static void __kmp_init_allocator() {}
static void __kmp_fini_allocator() {}
/* ------------------------------------------------------------------------ */
#ifdef KMP_DYNAMIC_LIB
# if KMP_OS_WINDOWS
static void
__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
// TODO: Change to __kmp_break_bootstrap_lock().
__kmp_init_bootstrap_lock( lck ); // make the lock released
}
static void
__kmp_reset_locks_on_process_detach( int gtid_req ) {
int i;
int thread_count;
// PROCESS_DETACH is expected to be called by a thread
// that executes ProcessExit() or FreeLibrary().
// OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
// So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
// However, in fact, some threads can be still alive here, although being about to be terminated.
// The threads in the array with ds_thread==0 are most suspicious.
// Actually, it can be not safe to access the __kmp_threads[].
// TODO: does it make sense to check __kmp_roots[] ?
// Let's check that there are no other alive threads registered with the OMP lib.
while( 1 ) {
thread_count = 0;
for( i = 0; i < __kmp_threads_capacity; ++i ) {
if( !__kmp_threads ) continue;
kmp_info_t* th = __kmp_threads[ i ];
if( th == NULL ) continue;
int gtid = th->th.th_info.ds.ds_gtid;
if( gtid == gtid_req ) continue;
if( gtid < 0 ) continue;
DWORD exit_val;
int alive = __kmp_is_thread_alive( th, &exit_val );
if( alive ) {
++thread_count;
}
}
if( thread_count == 0 ) break; // success
}
// Assume that I'm alone.
// Now it might be probably safe to check and reset locks.
// __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
__kmp_reset_lock( &__kmp_forkjoin_lock );
#ifdef KMP_DEBUG
__kmp_reset_lock( &__kmp_stdio_lock );
#endif // KMP_DEBUG
}
BOOL WINAPI
DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
//__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
switch( fdwReason ) {
case DLL_PROCESS_ATTACH:
KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
return TRUE;
case DLL_PROCESS_DETACH:
KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
__kmp_gtid_get_specific() ));
if( lpReserved != NULL )
{
// lpReserved is used for telling the difference:
// lpReserved == NULL when FreeLibrary() was called,
// lpReserved != NULL when the process terminates.
// When FreeLibrary() is called, worker threads remain alive.
// So they will release the forkjoin lock by themselves.
// When the process terminates, worker threads disappear triggering
// the problem of unreleased forkjoin lock as described below.
// A worker thread can take the forkjoin lock.
// The problem comes up if that worker thread becomes dead
// before it releases the forkjoin lock.
// The forkjoin lock remains taken, while the thread
// executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
// will try to take the forkjoin lock and will always fail,
// so that the application will never finish [normally].
// This scenario is possible if __kmpc_end() has not been executed.
// It looks like it's not a corner case, but common cases:
// - the main function was compiled by an alternative compiler;
// - the main function was compiled by icl but without /Qopenmp (application with plugins);
// - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
// - alive foreign thread prevented __kmpc_end from doing cleanup.
// This is a hack to work around the problem.
// TODO: !!! to figure out something better.
__kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
}
__kmp_internal_end_library( __kmp_gtid_get_specific() );
return TRUE;
case DLL_THREAD_ATTACH:
KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
/* if we wanted to register new siblings all the time here call
* __kmp_get_gtid(); */
return TRUE;
case DLL_THREAD_DETACH:
KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
__kmp_gtid_get_specific() ));
__kmp_internal_end_thread( __kmp_gtid_get_specific() );
return TRUE;
}
return TRUE;
}
# endif /* KMP_OS_WINDOWS */
#endif /* KMP_DYNAMIC_LIB */
/* ------------------------------------------------------------------------ */
/* Change the library type to "status" and return the old type */
/* called from within initialization routines where __kmp_initz_lock is held */
int
__kmp_change_library( int status )
{
int old_status;
old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
if (status) {
__kmp_yield_init |= 1; // throughput => turnaround (odd init count)
}
else {
__kmp_yield_init &= ~1; // turnaround => throughput (even init count)
}
return old_status; // return previous setting of whether KMP_LIBRARY=throughput
}
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* __kmp_parallel_deo --
* Wait until it's our turn.
*/
void
__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
{
int gtid = *gtid_ref;
#ifdef BUILD_PARALLEL_ORDERED
kmp_team_t *team = __kmp_team_from_gtid( gtid );
#endif /* BUILD_PARALLEL_ORDERED */
if( __kmp_env_consistency_check ) {
if( __kmp_threads[gtid]->th.th_root->r.r_active )
#if KMP_USE_DYNAMIC_LOCK
__kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
#else
__kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
#endif
}
#ifdef BUILD_PARALLEL_ORDERED
if( !team->t.t_serialized ) {
KMP_MB();
KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
KMP_MB();
}
#endif /* BUILD_PARALLEL_ORDERED */
}
/* __kmp_parallel_dxo --
* Signal the next task.
*/
void
__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
{
int gtid = *gtid_ref;
#ifdef BUILD_PARALLEL_ORDERED
int tid = __kmp_tid_from_gtid( gtid );
kmp_team_t *team = __kmp_team_from_gtid( gtid );
#endif /* BUILD_PARALLEL_ORDERED */
if( __kmp_env_consistency_check ) {
if( __kmp_threads[gtid]->th.th_root->r.r_active )
__kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
}
#ifdef BUILD_PARALLEL_ORDERED
if ( ! team->t.t_serialized ) {
KMP_MB(); /* Flush all pending memory write invalidates. */
/* use the tid of the next thread in this team */
/* TODO repleace with general release procedure */
team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
/* accept blame for "ordered" waiting */
kmp_info_t *this_thread = __kmp_threads[gtid];
ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
this_thread->th.ompt_thread_info.wait_id);
}
#endif
KMP_MB(); /* Flush all pending memory write invalidates. */
}
#endif /* BUILD_PARALLEL_ORDERED */
}
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* The BARRIER for a SINGLE process section is always explicit */
int
__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
{
int status;
kmp_info_t *th;
kmp_team_t *team;
if( ! TCR_4(__kmp_init_parallel) )
__kmp_parallel_initialize();
th = __kmp_threads[ gtid ];
team = th->th.th_team;
status = 0;
th->th.th_ident = id_ref;
if ( team->t.t_serialized ) {
status = 1;
} else {
kmp_int32 old_this = th->th.th_local.this_construct;
++th->th.th_local.this_construct;
/* try to set team count to thread count--success means thread got the
single block
*/
/* TODO: Should this be acquire or release? */
if (team->t.t_construct == old_this) {
status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
th->th.th_local.this_construct);
}
#if USE_ITT_BUILD
if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
#if OMP_40_ENABLED
th->th.th_teams_microtask == NULL &&
#endif
team->t.t_active_level == 1 )
{ // Only report metadata by master of active team at level 1
__kmp_itt_metadata_single( id_ref );
}
#endif /* USE_ITT_BUILD */
}
if( __kmp_env_consistency_check ) {
if (status && push_ws) {
__kmp_push_workshare( gtid, ct_psingle, id_ref );
} else {
__kmp_check_workshare( gtid, ct_psingle, id_ref );
}
}
#if USE_ITT_BUILD
if ( status ) {
__kmp_itt_single_start( gtid );
}
#endif /* USE_ITT_BUILD */
return status;
}
void
__kmp_exit_single( int gtid )
{
#if USE_ITT_BUILD
__kmp_itt_single_end( gtid );
#endif /* USE_ITT_BUILD */
if( __kmp_env_consistency_check )
__kmp_pop_workshare( gtid, ct_psingle, NULL );
}
/*
* determine if we can go parallel or must use a serialized parallel region and
* how many threads we can use
* set_nproc is the number of threads requested for the team
* returns 0 if we should serialize or only use one thread,
* otherwise the number of threads to use
* The forkjoin lock is held by the caller.
*/
static int
__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
int master_tid, int set_nthreads
#if OMP_40_ENABLED
, int enter_teams
#endif /* OMP_40_ENABLED */
)
{
int capacity;
int new_nthreads;
KMP_DEBUG_ASSERT( __kmp_init_serial );
KMP_DEBUG_ASSERT( root && parent_team );
//
// If dyn-var is set, dynamically adjust the number of desired threads,
// according to the method specified by dynamic_mode.
//
new_nthreads = set_nthreads;
if ( ! get__dynamic_2( parent_team, master_tid ) ) {
;
}
#ifdef USE_LOAD_BALANCE
else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
if ( new_nthreads == 1 ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
master_tid ));
return 1;
}
if ( new_nthreads < set_nthreads ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
master_tid, new_nthreads ));
}
}
#endif /* USE_LOAD_BALANCE */
else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
: root->r.r_hot_team->t.t_nproc);
if ( new_nthreads <= 1 ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
master_tid ));
return 1;
}
if ( new_nthreads < set_nthreads ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
master_tid, new_nthreads ));
}
else {
new_nthreads = set_nthreads;
}
}
else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
if ( set_nthreads > 2 ) {
new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
new_nthreads = ( new_nthreads % set_nthreads ) + 1;
if ( new_nthreads == 1 ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
master_tid ));
return 1;
}
if ( new_nthreads < set_nthreads ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
master_tid, new_nthreads ));
}
}
}
else {
KMP_ASSERT( 0 );
}
//
// Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
//
if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
root->r.r_hot_team->t.t_nproc );
if ( tl_nthreads <= 0 ) {
tl_nthreads = 1;
}
//
// If dyn-var is false, emit a 1-time warning.
//
if ( ! get__dynamic_2( parent_team, master_tid )
&& ( ! __kmp_reserve_warn ) ) {
__kmp_reserve_warn = 1;
__kmp_msg(
kmp_ms_warning,
KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
KMP_HNT( Unset_ALL_THREADS ),
__kmp_msg_null
);
}
if ( tl_nthreads == 1 ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
master_tid ));
return 1;
}
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
master_tid, tl_nthreads ));
new_nthreads = tl_nthreads;
}
//
// Check if the threads array is large enough, or needs expanding.
//
// See comment in __kmp_register_root() about the adjustment if
// __kmp_threads[0] == NULL.
//
capacity = __kmp_threads_capacity;
if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
--capacity;
}
if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
root->r.r_hot_team->t.t_nproc ) > capacity ) {
//
// Expand the threads array.
//
int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
root->r.r_hot_team->t.t_nproc ) - capacity;
int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
if ( slotsAdded < slotsRequired ) {
//
// The threads array was not expanded enough.
//
new_nthreads -= ( slotsRequired - slotsAdded );
KMP_ASSERT( new_nthreads >= 1 );
//
// If dyn-var is false, emit a 1-time warning.
//
if ( ! get__dynamic_2( parent_team, master_tid )
&& ( ! __kmp_reserve_warn ) ) {
__kmp_reserve_warn = 1;
if ( __kmp_tp_cached ) {
__kmp_msg(
kmp_ms_warning,
KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
KMP_HNT( PossibleSystemLimitOnThreads ),
__kmp_msg_null
);
}
else {
__kmp_msg(
kmp_ms_warning,
KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
KMP_HNT( SystemLimitOnThreads ),
__kmp_msg_null
);
}
}
}
}
if ( new_nthreads == 1 ) {
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
__kmp_get_gtid(), set_nthreads ) );
return 1;
}
KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
__kmp_get_gtid(), new_nthreads, set_nthreads ));
return new_nthreads;
}
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* allocate threads from the thread pool and assign them to the new team */
/* we are assured that there are enough threads available, because we
* checked on that earlier within critical section forkjoin */
static void
__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
kmp_info_t *master_th, int master_gtid )
{
int i;
int use_hot_team;
KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
KMP_MB();
/* first, let's setup the master thread */
master_th->th.th_info.ds.ds_tid = 0;
master_th->th.th_team = team;
master_th->th.th_team_nproc = team->t.t_nproc;
master_th->th.th_team_master = master_th;
master_th->th.th_team_serialized = FALSE;
master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
/* make sure we are not the optimized hot team */
#if KMP_NESTED_HOT_TEAMS
use_hot_team = 0;
kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0
int level = team->t.t_active_level - 1; // index in array of hot teams
if( master_th->th.th_teams_microtask ) { // are we inside the teams?
if( master_th->th.th_teams_size.nteams > 1 ) {
++level; // level was not increased in teams construct for team_of_masters
}
if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
master_th->th.th_teams_level == team->t.t_level ) {
++level; // level was not increased in teams construct for team_of_workers before the parallel
} // team->t.t_level will be increased inside parallel
}
if( level < __kmp_hot_teams_max_level ) {
if( hot_teams[level].hot_team ) {
// hot team has already been allocated for given level
KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
use_hot_team = 1; // the team is ready to use
} else {
use_hot_team = 0; // AC: threads are not allocated yet
hot_teams[level].hot_team = team; // remember new hot team
hot_teams[level].hot_team_nth = team->t.t_nproc;
}
} else {
use_hot_team = 0;
}
}
#else
use_hot_team = team == root->r.r_hot_team;
#endif
if ( !use_hot_team ) {
/* install the master thread */
team->t.t_threads[ 0 ] = master_th;
__kmp_initialize_info( master_th, team, 0, master_gtid );
/* now, install the worker threads */
for ( i=1 ; i < team->t.t_nproc ; i++ ) {
/* fork or reallocate a new thread and install it in team */
kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
team->t.t_threads[ i ] = thr;
KMP_DEBUG_ASSERT( thr );
KMP_DEBUG_ASSERT( thr->th.th_team == team );
/* align team and thread arrived states */
KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
__kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
__kmp_gtid_from_tid( i, team ), team->t.t_id, i,
team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
#if OMP_40_ENABLED
thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
thr->th.th_teams_level = master_th->th.th_teams_level;
thr->th.th_teams_size = master_th->th.th_teams_size;
#endif
{ // Initialize threads' barrier data.
int b;
kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
for ( b = 0; b < bs_last_barrier; ++ b ) {
balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
#if USE_DEBUGGER
balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
#endif
}; // for b
}
}
#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
__kmp_partition_places( team );
#endif
}
KMP_MB();
}
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
//
// Propagate any changes to the floating point control registers out to the team
// We try to avoid unnecessary writes to the relevant cache line in the team structure,
// so we don't make changes unless they are needed.
//
inline static void
propagateFPControl(kmp_team_t * team)
{
if ( __kmp_inherit_fp_control ) {
kmp_int16 x87_fpu_control_word;
kmp_uint32 mxcsr;
// Get master values of FPU control flags (both X87 and vector)
__kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
__kmp_store_mxcsr( &mxcsr );
mxcsr &= KMP_X86_MXCSR_MASK;
// There is no point looking at t_fp_control_saved here.
// If it is TRUE, we still have to update the values if they are different from those we now have.
// If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure
// that the values in the team are the same as those we have.
// So, this code achieves what we need whether or not t_fp_control_saved is true.
// By checking whether the value needs updating we avoid unnecessary writes that would put the
// cache-line into a written state, causing all threads in the team to have to read it again.
KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
// Although we don't use this value, other code in the runtime wants to know whether it should restore them.
// So we must ensure it is correct.
KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
}
else {
// Similarly here. Don't write to this cache-line in the team structure unless we have to.
KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
}
}
// Do the opposite, setting the hardware registers to the updated values from the team.
inline static void
updateHWFPControl(kmp_team_t * team)
{
if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
//
// Only reset the fp control regs if they have been changed in the team.
// the parallel region that we are exiting.
//
kmp_int16 x87_fpu_control_word;
kmp_uint32 mxcsr;
__kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
__kmp_store_mxcsr( &mxcsr );
mxcsr &= KMP_X86_MXCSR_MASK;
if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
__kmp_clear_x87_fpu_status_word();
__kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
}
if ( team->t.t_mxcsr != mxcsr ) {
__kmp_load_mxcsr( &team->t.t_mxcsr );
}
}
}
#else
# define propagateFPControl(x) ((void)0)
# define updateHWFPControl(x) ((void)0)
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
static void
__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
/*
* Run a parallel region that has been serialized, so runs only in a team of the single master thread.
*/
void
__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
{
kmp_info_t *this_thr;
kmp_team_t *serial_team;
KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
/* Skip all this code for autopar serialized loops since it results in
unacceptable overhead */
if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
return;
if( ! TCR_4( __kmp_init_parallel ) )
__kmp_parallel_initialize();
this_thr = __kmp_threads[ global_tid ];
serial_team = this_thr->th.th_serial_team;
/* utilize the serialized team held by this thread */
KMP_DEBUG_ASSERT( serial_team );
KMP_MB();
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
this_thr->th.th_task_team = NULL;
}
#if OMP_40_ENABLED
kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
proc_bind = proc_bind_false;
}
else if ( proc_bind == proc_bind_default ) {
//
// No proc_bind clause was specified, so use the current value
// of proc-bind-var for this parallel region.
//
proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
}
//
// Reset for next parallel region
//
this_thr->th.th_set_proc_bind = proc_bind_default;
#endif /* OMP_40_ENABLED */
if( this_thr->th.th_team != serial_team ) {
// Nested level will be an index in the nested nthreads array
int level = this_thr->th.th_team->t.t_level;
if( serial_team->t.t_serialized ) {
/* this serial team was already used
* TODO increase performance by making this locks more specific */
kmp_team_t *new_team;
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
#endif
new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
#if OMPT_SUPPORT
ompt_parallel_id,
#endif
#if OMP_40_ENABLED
proc_bind,
#endif
& this_thr->th.th_current_task->td_icvs,
0 USE_NESTED_HOT_ARG(NULL) );
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
KMP_ASSERT( new_team );
/* setup new serialized team and install it */
new_team->t.t_threads[0] = this_thr;
new_team->t.t_parent = this_thr->th.th_team;
serial_team = new_team;
this_thr->th.th_serial_team = serial_team;
KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
global_tid, serial_team ) );
/* TODO the above breaks the requirement that if we run out of
* resources, then we can still guarantee that serialized teams
* are ok, since we may need to allocate a new one */
} else {
KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
global_tid, serial_team ) );
}
/* we have to initialize this serial team */
KMP_DEBUG_ASSERT( serial_team->t.t_threads );
KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
serial_team->t.t_ident = loc;
serial_team->t.t_serialized = 1;
serial_team->t.t_nproc = 1;
serial_team->t.t_parent = this_thr->th.th_team;
serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
this_thr->th.th_team = serial_team;
serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
global_tid, this_thr->th.th_current_task ) );
KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
this_thr->th.th_current_task->td_flags.executing = 0;
__kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
/* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
each serialized task represented by team->t.t_serialized? */
copy_icvs(
& this_thr->th.th_current_task->td_icvs,
& this_thr->th.th_current_task->td_parent->td_icvs );
// Thread value exists in the nested nthreads array for the next nested level
if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
}
#if OMP_40_ENABLED
if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
this_thr->th.th_current_task->td_icvs.proc_bind
= __kmp_nested_proc_bind.bind_types[ level + 1 ];
}
#endif /* OMP_40_ENABLED */
#if USE_DEBUGGER
serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger.
#endif
this_thr->th.th_info.ds.ds_tid = 0;
/* set thread cache values */
this_thr->th.th_team_nproc = 1;
this_thr->th.th_team_master = this_thr;
this_thr->th.th_team_serialized = 1;
serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
propagateFPControl (serial_team);
/* check if we need to allocate dispatch buffers stack */
KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
__kmp_allocate( sizeof( dispatch_private_info_t ) );
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
__ompt_team_assign_id(serial_team, ompt_parallel_id);
#endif
KMP_MB();
} else {
/* this serialized team is already being used,
* that's fine, just add another nested level */
KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
KMP_DEBUG_ASSERT( serial_team->t.t_threads );
KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
++ serial_team->t.t_serialized;
this_thr->th.th_team_serialized = serial_team->t.t_serialized;
// Nested level will be an index in the nested nthreads array
int level = this_thr->th.th_team->t.t_level;
// Thread value exists in the nested nthreads array for the next nested level
if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
}
serial_team->t.t_level++;
KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
global_tid, serial_team, serial_team->t.t_level ) );
/* allocate/push dispatch buffers stack */
KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
{
dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
__kmp_allocate( sizeof( dispatch_private_info_t ) );
disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
KMP_MB();
}
if ( __kmp_env_consistency_check )
__kmp_push_parallel( global_tid, NULL );
}
/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int
__kmp_fork_call(
ident_t * loc,
int gtid,
enum fork_context_e call_context, // Intel, GNU, ...
kmp_int32 argc,
#if OMPT_SUPPORT
void *unwrapped_task,
#endif
microtask_t microtask,
launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
va_list * ap
#else
va_list ap
#endif
)
{
void **argv;
int i;
int master_tid;
int master_this_cons;
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
kmp_root_t *root;
int nthreads;
int master_active;
int master_set_numthreads;
int level;
#if OMP_40_ENABLED
int active_level;
int teams_level;
#endif
#if KMP_NESTED_HOT_TEAMS
kmp_hot_team_ptr_t **p_hot_teams;
#endif
{ // KMP_TIME_BLOCK
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
/* Some systems prefer the stack for the root thread(s) to start with */
/* some gap from the parent stack to prevent false sharing. */
void *dummy = KMP_ALLOCA(__kmp_stkpadding);
/* These 2 lines below are so this does not get optimized out */
if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
__kmp_stkpadding += (short)((kmp_int64)dummy);
}
/* initialize if needed */
KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown
if( ! TCR_4(__kmp_init_parallel) )
__kmp_parallel_initialize();
/* setup current data */
master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown
parent_team = master_th->th.th_team;
master_tid = master_th->th.th_info.ds.ds_tid;
master_this_cons = master_th->th.th_local.this_construct;
root = master_th->th.th_root;
master_active = root->r.r_active;
master_set_numthreads = master_th->th.th_set_nproc;
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id;
ompt_task_id_t ompt_task_id;
ompt_frame_t *ompt_frame;
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
if (ompt_enabled) {
ompt_parallel_id = __ompt_parallel_id_new(gtid);
ompt_task_id = __ompt_get_task_id_internal(0);
ompt_frame = __ompt_get_task_frame_internal(0);
}
#endif
// Nested level will be an index in the nested nthreads array
level = parent_team->t.t_level;
active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed
#if OMP_40_ENABLED
teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
#endif
#if KMP_NESTED_HOT_TEAMS
p_hot_teams = &master_th->th.th_hot_teams;
if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
*p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
(*p_hot_teams)[0].hot_team = root->r.r_hot_team;
(*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0)
}
#endif
#if OMPT_SUPPORT
if (ompt_enabled &&
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
int team_size = master_set_numthreads;
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
ompt_task_id, ompt_frame, ompt_parallel_id,
team_size, unwrapped_task, OMPT_INVOKER(call_context));
}
#endif
master_th->th.th_ident = loc;
#if OMP_40_ENABLED
if ( master_th->th.th_teams_microtask &&
ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
// AC: This is start of parallel that is nested inside teams construct.
// The team is actual (hot), all workers are ready at the fork barrier.
// No lock needed to initialize the team a bit, then free workers.
parent_team->t.t_ident = loc;
__kmp_alloc_argv_entries( argc, parent_team, TRUE );
parent_team->t.t_argc = argc;
argv = (void**)parent_team->t.t_argv;
for( i=argc-1; i >= 0; --i )
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
#else
*argv++ = va_arg( ap, void * );
#endif
/* Increment our nested depth levels, but not increase the serialization */
if ( parent_team == master_th->th.th_serial_team ) {
// AC: we are in serialized parallel
__kmpc_serialized_parallel(loc, gtid);
KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
parent_team->t.t_serialized--; // AC: need this in order enquiry functions
// work correctly, will restore at join time
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
/* OMPT implicit task begin */
my_task_id = lw_taskteam.ompt_task_info.task_id;
my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
#endif
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
}
#if OMPT_SUPPORT
if (ompt_enabled) {
#if OMPT_TRACE
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
ompt_parallel_id, ompt_task_id);
}
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
#endif
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id,
OMPT_INVOKER(call_context));
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
return TRUE;
}
parent_team->t.t_pkfn = microtask;
#if OMPT_SUPPORT
parent_team->t.ompt_team_info.microtask = unwrapped_task;
#endif
parent_team->t.t_invoke = invoker;
KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
parent_team->t.t_active_level ++;
parent_team->t.t_level ++;
/* Change number of threads in the team if requested */
if ( master_set_numthreads ) { // The parallel has num_threads clause
if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
// AC: only can reduce the number of threads dynamically, cannot increase
kmp_info_t **other_threads = parent_team->t.t_threads;
parent_team->t.t_nproc = master_set_numthreads;
for ( i = 0; i < master_set_numthreads; ++i ) {
other_threads[i]->th.th_team_nproc = master_set_numthreads;
}
// Keep extra threads hot in the team for possible next parallels
}
master_th->th.th_set_nproc = 0;
}
#if USE_DEBUGGER
if ( __kmp_debugging ) { // Let debugger override number of threads.
int nth = __kmp_omp_num_threads( loc );
if ( nth > 0 ) { // 0 means debugger does not want to change number of threads.
master_set_numthreads = nth;
}; // if
}; // if
#endif
KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
__kmp_internal_fork( loc, gtid, parent_team );
KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
/* Invoke microtask for MASTER thread */
KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
if (! parent_team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
}
KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
KMP_MB(); /* Flush all pending memory write invalidates. */
KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
return TRUE;
} // Parallel closely nested in teams construct
#endif /* OMP_40_ENABLED */
#if KMP_DEBUG
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
}
#endif
if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
nthreads = 1;
} else {
#if OMP_40_ENABLED
int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
#endif
nthreads = master_set_numthreads ?
master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
// Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
// This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
if (nthreads > 1) {
if ( ( !get__nested(master_th) && (root->r.r_in_parallel
#if OMP_40_ENABLED
&& !enter_teams
#endif /* OMP_40_ENABLED */
) ) || ( __kmp_library == library_serial ) ) {
KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
gtid, nthreads ));
nthreads = 1;
}
}
if ( nthreads > 1 ) {
/* determine how many new threads we can use */
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
#if OMP_40_ENABLED
/* AC: If we execute teams from parallel region (on host), then teams should be created
but each can only have 1 thread if nesting is disabled. If teams called from serial region,
then teams and their threads should be created regardless of the nesting setting. */
, enter_teams
#endif /* OMP_40_ENABLED */
);
if ( nthreads == 1 ) {
// Free lock for single thread execution here;
// for multi-thread execution it will be freed later
// after team of threads created and initialized
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
}
}
}
KMP_DEBUG_ASSERT( nthreads > 0 );
/* If we temporarily changed the set number of threads then restore it now */
master_th->th.th_set_nproc = 0;
/* create a serialized parallel region? */
if ( nthreads == 1 ) {
/* josh todo: hypothetical question: what do we do for OS X*? */
#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
void * args[ argc ];
#else
void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
__kmpc_serialized_parallel(loc, gtid);
if ( call_context == fork_context_intel ) {
/* TODO this sucks, use the compiler itself to pass args! :) */
master_th->th.th_serial_team->t.t_ident = loc;
#if OMP_40_ENABLED
if ( !ap ) {
// revert change made in __kmpc_serialized_parallel()
master_th->th.th_serial_team->t.t_level--;
// Get args from parent team for teams construct
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
my_task_id = lw_taskteam.ompt_task_info.task_id;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
ompt_parallel_id, my_task_id);
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
#endif
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
}
#if OMPT_SUPPORT
if (ompt_enabled) {
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
#if OMPT_TRACE
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
ompt_parallel_id, ompt_task_id);
}
#endif
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id,
OMPT_INVOKER(call_context));
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
} else if ( microtask == (microtask_t)__kmp_teams_master ) {
KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
team = master_th->th.th_team;
//team->t.t_pkfn = microtask;
team->t.t_invoke = invoker;
__kmp_alloc_argv_entries( argc, team, TRUE );
team->t.t_argc = argc;
argv = (void**) team->t.t_argv;
if ( ap ) {
for( i=argc-1; i >= 0; --i )
// TODO: revert workaround for Intel(R) 64 tracker #96
# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
# else
*argv++ = va_arg( ap, void * );
# endif
} else {
for( i=0; i < argc; ++i )
// Get args from parent team for teams construct
argv[i] = parent_team->t.t_argv[i];
}
// AC: revert change made in __kmpc_serialized_parallel()
// because initial code in teams should have level=0
team->t.t_level--;
// AC: call special invoker for outer "parallel" of the teams construct
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
invoker(gtid);
}
} else {
#endif /* OMP_40_ENABLED */
argv = args;
for( i=argc-1; i >= 0; --i )
// TODO: revert workaround for Intel(R) 64 tracker #96
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
#else
*argv++ = va_arg( ap, void * );
#endif
KMP_MB();
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
/* OMPT implicit task begin */
my_task_id = lw_taskteam.ompt_task_info.task_id;
my_parallel_id = ompt_parallel_id;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
#endif
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, args
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
}
#if OMPT_SUPPORT
if (ompt_enabled) {
#if OMPT_TRACE
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
my_parallel_id, my_task_id);
}
#endif
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id,
OMPT_INVOKER(call_context));
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
#if OMP_40_ENABLED
}
#endif /* OMP_40_ENABLED */
}
else if ( call_context == fork_context_gnu ) {
#if OMPT_SUPPORT
ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
__kmp_allocate(sizeof(ompt_lw_taskteam_t));
__ompt_lw_taskteam_init(lwt, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
lwt->ompt_task_info.frame.exit_runtime_frame = 0;
__ompt_lw_taskteam_link(lwt, master_th);
#endif
// we were called from GNU native code
KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
return FALSE;
}
else {
KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" );
}
KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
KMP_MB();
return FALSE;
}
// GEH: only modify the executing flag in the case when not serialized
// serialized case is handled in kmpc_serialized_parallel
KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
master_th->th.th_current_task->td_icvs.max_active_levels ) );
// TODO: GEH - cannot do this assertion because root thread not set up as executing
// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
master_th->th.th_current_task->td_flags.executing = 0;
#if OMP_40_ENABLED
if ( !master_th->th.th_teams_microtask || level > teams_level )
#endif /* OMP_40_ENABLED */
{
/* Increment our nested depth level */
KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
}
// See if we need to make a copy of the ICVs.
int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
nthreads_icv = __kmp_nested_nth.nth[level+1];
}
else {
nthreads_icv = 0; // don't update
}
#if OMP_40_ENABLED
// Figure out the proc_bind_policy for the new team.
kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update
if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
proc_bind = proc_bind_false;
}
else {
if (proc_bind == proc_bind_default) {
// No proc_bind clause specified; use current proc-bind-var for this parallel region
proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
}
/* else: The proc_bind policy was specified explicitly on parallel clause. This
overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */
// Figure the value of proc-bind-var for the child threads.
if ((level+1 < __kmp_nested_proc_bind.used)
&& (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
}
}
// Reset for next parallel region
master_th->th.th_set_proc_bind = proc_bind_default;
#endif /* OMP_40_ENABLED */
if ((nthreads_icv > 0)
#if OMP_40_ENABLED
|| (proc_bind_icv != proc_bind_default)
#endif /* OMP_40_ENABLED */
) {
kmp_internal_control_t new_icvs;
copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
new_icvs.next = NULL;
if (nthreads_icv > 0) {
new_icvs.nproc = nthreads_icv;
}
#if OMP_40_ENABLED
if (proc_bind_icv != proc_bind_default) {
new_icvs.proc_bind = proc_bind_icv;
}
#endif /* OMP_40_ENABLED */
/* allocate a new parallel team */
KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
ompt_parallel_id,
#endif
#if OMP_40_ENABLED
proc_bind,
#endif
&new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
} else {
/* allocate a new parallel team */
KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
ompt_parallel_id,
#endif
#if OMP_40_ENABLED
proc_bind,
#endif
&master_th->th.th_current_task->td_icvs, argc
USE_NESTED_HOT_ARG(master_th) );
}
KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
/* setup the new team */
KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
KMP_CHECK_UPDATE(team->t.t_ident, loc);
KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
#if OMPT_SUPPORT
KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
#endif
KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */
// TODO: parent_team->t.t_level == INT_MAX ???
#if OMP_40_ENABLED
if ( !master_th->th.th_teams_microtask || level > teams_level ) {
#endif /* OMP_40_ENABLED */
int new_level = parent_team->t.t_level + 1;
KMP_CHECK_UPDATE(team->t.t_level, new_level);
new_level = parent_team->t.t_active_level + 1;
KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
#if OMP_40_ENABLED
} else {
// AC: Do not increase parallel level at start of the teams construct
int new_level = parent_team->t.t_level;
KMP_CHECK_UPDATE(team->t.t_level, new_level);
new_level = parent_team->t.t_active_level;
KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
}
#endif /* OMP_40_ENABLED */
kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
#if OMP_40_ENABLED
KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
#endif
// Update the floating point rounding in the team if required.
propagateFPControl(team);
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Set master's task team to team's task team. Unless this is hot team, it should be NULL.
#if 0
// Patch out an assertion that trips while the runtime seems to operate correctly.
// Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch.
KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
#endif
KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
__kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
if ( active_level || master_th->th.th_task_team ) {
// Take a memo of master's task_state
KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size
kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
kmp_uint8 *old_stack, *new_stack;
kmp_uint32 i;
new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
new_stack[i] = master_th->th.th_task_state_memo_stack[i];
}
for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack
new_stack[i] = 0;
}
old_stack = master_th->th.th_task_state_memo_stack;
master_th->th.th_task_state_memo_stack = new_stack;
master_th->th.th_task_state_stack_sz = new_size;
__kmp_free(old_stack);
}
// Store master's task_state on stack
master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
master_th->th.th_task_state_top++;
#if KMP_NESTED_HOT_TEAMS
if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team
master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
}
else {
#endif
master_th->th.th_task_state = 0;
#if KMP_NESTED_HOT_TEAMS
}
#endif
}
#if !KMP_NESTED_HOT_TEAMS
KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
#endif
}
KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
( team->t.t_master_tid == 0 &&
( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
KMP_MB();
/* now, setup the arguments */
argv = (void**)team->t.t_argv;
#if OMP_40_ENABLED
if ( ap ) {
#endif /* OMP_40_ENABLED */
for ( i=argc-1; i >= 0; --i ) {
// TODO: revert workaround for Intel(R) 64 tracker #96
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
void *new_argv = va_arg(*ap, void *);
#else
void *new_argv = va_arg(ap, void *);
#endif
KMP_CHECK_UPDATE(*argv, new_argv);
argv++;
}
#if OMP_40_ENABLED
} else {
for ( i=0; i < argc; ++i ) {
// Get args from parent team for teams construct
KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
}
}
#endif /* OMP_40_ENABLED */
/* now actually fork the threads */
KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
root->r.r_active = TRUE;
__kmp_fork_team_threads( root, team, master_th, gtid );
__kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
#if OMPT_SUPPORT
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
#endif
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
#if USE_ITT_BUILD
if ( team->t.t_active_level == 1 // only report frames at level 1
# if OMP_40_ENABLED
&& !master_th->th.th_teams_microtask // not in teams construct
# endif /* OMP_40_ENABLED */
) {
#if USE_ITT_NOTIFY
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
{
kmp_uint64 tmp_time = 0;
if ( __itt_get_timestamp_ptr )
tmp_time = __itt_get_timestamp();
// Internal fork - report frame begin
master_th->th.th_frame_time = tmp_time;
if ( __kmp_forkjoin_frames_mode == 3 )
team->t.t_region_time = tmp_time;
} else // only one notification scheme (either "submit" or "forking/joined", not both)
#endif /* USE_ITT_NOTIFY */
if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
__kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
{ // Mark start of "parallel" region for VTune.
__kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
}
}
#endif /* USE_ITT_BUILD */
/* now go on and do the work */
KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
KMP_MB();
KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
root, team, master_th, gtid));
#if USE_ITT_BUILD
if ( __itt_stack_caller_create_ptr ) {
team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
}
#endif /* USE_ITT_BUILD */
#if OMP_40_ENABLED
if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
#endif /* OMP_40_ENABLED */
{
__kmp_internal_fork( loc, gtid, team );
KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
root, team, master_th, gtid));
}
if (call_context == fork_context_gnu) {
KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
return TRUE;
}
/* Invoke microtask for MASTER thread */
KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
gtid, team->t.t_id, team->t.t_pkfn ) );
} // END of timer KMP_fork_call block
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
}
KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
gtid, team->t.t_id, team->t.t_pkfn ) );
KMP_MB(); /* Flush all pending memory write invalidates. */
KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
#if OMPT_SUPPORT
if (ompt_enabled) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
return TRUE;
}
#if OMPT_SUPPORT
static inline void
__kmp_join_restore_state(
kmp_info_t *thread,
kmp_team_t *team)
{
// restore state outside the region
thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
ompt_state_work_serial : ompt_state_work_parallel);
}
static inline void
__kmp_join_ompt(
kmp_info_t *thread,
kmp_team_t *team,
ompt_parallel_id_t parallel_id,
fork_context_e fork_context)
{
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
}
__kmp_join_restore_state(thread,team);
}
#endif
void
__kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT
, enum fork_context_e fork_context
#endif
#if OMP_40_ENABLED
, int exit_teams
#endif /* OMP_40_ENABLED */
)
{
KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
kmp_root_t *root;
int master_active;
int i;
KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
/* setup current data */
master_th = __kmp_threads[ gtid ];
root = master_th->th.th_root;
team = master_th->th.th_team;
parent_team = team->t.t_parent;
master_th->th.th_ident = loc;
#if OMPT_SUPPORT
if (ompt_enabled) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
#if KMP_DEBUG
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
__kmp_gtid_from_thread( master_th ), team,
team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
}
#endif
if( team->t.t_serialized ) {
#if OMP_40_ENABLED
if ( master_th->th.th_teams_microtask ) {
// We are in teams construct
int level = team->t.t_level;
int tlevel = master_th->th.th_teams_level;
if ( level == tlevel ) {
// AC: we haven't incremented it earlier at start of teams construct,
// so do it here - at the end of teams construct
team->t.t_level++;
} else if ( level == tlevel + 1 ) {
// AC: we are exiting parallel inside teams, need to increment serialization
// in order to restore it in the next call to __kmpc_end_serialized_parallel
team->t.t_serialized++;
}
}
#endif /* OMP_40_ENABLED */
__kmpc_end_serialized_parallel( loc, gtid );
#if OMPT_SUPPORT
if (ompt_enabled) {
__kmp_join_restore_state(master_th, parent_team);
}
#endif
return;
}
master_active = team->t.t_master_active;
#if OMP_40_ENABLED
if (!exit_teams)
#endif /* OMP_40_ENABLED */
{
// AC: No barrier for internal teams at exit from teams construct.
// But there is barrier for external team (league).
__kmp_internal_join( loc, gtid, team );
}
#if OMP_40_ENABLED
else {
master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel)
}
#endif /* OMP_40_ENABLED */
KMP_MB();
#if OMPT_SUPPORT
ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
#endif
#if USE_ITT_BUILD
if ( __itt_stack_caller_create_ptr ) {
__kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
}
// Mark end of "parallel" region for VTune.
if ( team->t.t_active_level == 1
# if OMP_40_ENABLED
&& !master_th->th.th_teams_microtask /* not in teams construct */
# endif /* OMP_40_ENABLED */
) {
master_th->th.th_ident = loc;
// only one notification scheme (either "submit" or "forking/joined", not both)
if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
__kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
0, loc, master_th->th.th_team_nproc, 1 );
else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
__kmp_itt_region_joined( gtid );
} // active_level == 1
#endif /* USE_ITT_BUILD */
#if OMP_40_ENABLED
if ( master_th->th.th_teams_microtask &&
!exit_teams &&
team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
team->t.t_level == master_th->th.th_teams_level + 1 ) {
// AC: We need to leave the team structure intact at the end
// of parallel inside the teams construct, so that at the next
// parallel same (hot) team works, only adjust nesting levels
/* Decrement our nested depth level */
team->t.t_level --;
team->t.t_active_level --;
KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
/* Restore number of threads in the team if needed */
if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
int old_num = master_th->th.th_team_nproc;
int new_num = master_th->th.th_teams_size.nth;
kmp_info_t **other_threads = team->t.t_threads;
team->t.t_nproc = new_num;
for ( i = 0; i < old_num; ++i ) {
other_threads[i]->th.th_team_nproc = new_num;
}
// Adjust states of non-used threads of the team
for ( i = old_num; i < new_num; ++i ) {
// Re-initialize thread's barrier data.
int b;
kmp_balign_t * balign = other_threads[i]->th.th_bar;
for ( b = 0; b < bs_last_barrier; ++ b ) {
balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
#if USE_DEBUGGER
balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
#endif
}
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
// Synchronize thread's task state
other_threads[i]->th.th_task_state = master_th->th.th_task_state;
}
}
}
#if OMPT_SUPPORT
if (ompt_enabled) {
__kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
}
#endif
return;
}
#endif /* OMP_40_ENABLED */
/* do cleanup and restore the parent team */
master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
master_th->th.th_local.this_construct = team->t.t_master_this_cons;
master_th->th.th_dispatch =
& parent_team->t.t_dispatch[ team->t.t_master_tid ];
/* jc: The following lock has instructions with REL and ACQ semantics,
separating the parallel user code called in this parallel region
from the serial user code called after this function returns.
*/
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
#if OMP_40_ENABLED
if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
#endif /* OMP_40_ENABLED */
{
/* Decrement our nested depth level */
KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
}
KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
#if OMPT_SUPPORT && OMPT_TRACE
if(ompt_enabled){
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
parallel_id, task_info->task_id);
}
task_info->frame.exit_runtime_frame = 0;
task_info->task_id = 0;
}
#endif
KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
0, master_th, team ) );
__kmp_pop_current_task_from_thread( master_th );
#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
//
// Restore master thread's partition.
//
master_th->th.th_first_place = team->t.t_first_place;
master_th->th.th_last_place = team->t.t_last_place;
#endif /* OMP_40_ENABLED */
updateHWFPControl (team);
if ( root->r.r_active != master_active )
root->r.r_active = master_active;
__kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads
/* this race was fun to find. make sure the following is in the critical
* region otherwise assertions may fail occasionally since the old team
* may be reallocated and the hierarchy appears inconsistent. it is
* actually safe to run and won't cause any bugs, but will cause those
* assertion failures. it's only one deref&assign so might as well put this
* in the critical region */
master_th->th.th_team = parent_team;
master_th->th.th_team_nproc = parent_team->t.t_nproc;
master_th->th.th_team_master = parent_team->t.t_threads[0];
master_th->th.th_team_serialized = parent_team->t.t_serialized;
/* restore serialized team, if need be */
if( parent_team->t.t_serialized &&
parent_team != master_th->th.th_serial_team &&
parent_team != root->r.r_root_team ) {
__kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
master_th->th.th_serial_team = parent_team;
}
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack
KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
// Remember master's state if we re-use this nested hot team
master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
--master_th->th.th_task_state_top; // pop
// Now restore state at this level
master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
}
// Copy the task team from the parent team to the master thread
master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
__kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
}
// TODO: GEH - cannot do this assertion because root thread not set up as executing
// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
master_th->th.th_current_task->td_flags.executing = 1;
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
#if OMPT_SUPPORT
if (ompt_enabled) {
__kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
}
#endif
KMP_MB();
KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
}
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
/* Check whether we should push an internal control record onto the
serial team stack. If so, do it. */
void
__kmp_save_internal_controls ( kmp_info_t * thread )
{
if ( thread->th.th_team != thread->th.th_serial_team ) {
return;
}
if (thread->th.th_team->t.t_serialized > 1) {
int push = 0;
if (thread->th.th_team->t.t_control_stack_top == NULL) {
push = 1;
} else {
if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
thread->th.th_team->t.t_serialized ) {
push = 1;
}
}
if (push) { /* push a record on the serial team's stack */
kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
copy_icvs( control, & thread->th.th_current_task->td_icvs );
control->serial_nesting_level = thread->th.th_team->t.t_serialized;
control->next = thread->th.th_team->t.t_control_stack_top;
thread->th.th_team->t.t_control_stack_top = control;
}
}
}
/* Changes set_nproc */
void
__kmp_set_num_threads( int new_nth, int gtid )
{
kmp_info_t *thread;
kmp_root_t *root;
KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
KMP_DEBUG_ASSERT( __kmp_init_serial );
if (new_nth < 1)
new_nth = 1;
else if (new_nth > __kmp_max_nth)
new_nth = __kmp_max_nth;
KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
thread = __kmp_threads[gtid];
__kmp_save_internal_controls( thread );
set__nproc( thread, new_nth );
//
// If this omp_set_num_threads() call will cause the hot team size to be
// reduced (in the absence of a num_threads clause), then reduce it now,
// rather than waiting for the next parallel region.
//
root = thread->th.th_root;
if ( __kmp_init_parallel && ( ! root->r.r_active )
&& ( root->r.r_hot_team->t.t_nproc > new_nth )
#if KMP_NESTED_HOT_TEAMS
&& __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
#endif
) {
kmp_team_t *hot_team = root->r.r_hot_team;
int f;
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
// Release the extra threads we don't need any more.
for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
if ( __kmp_tasking_mode != tskm_immediate_exec) {
// When decreasing team size, threads no longer in the team should unref task team.
hot_team->t.t_threads[f]->th.th_task_team = NULL;
}
__kmp_free_thread( hot_team->t.t_threads[f] );
hot_team->t.t_threads[f] = NULL;
}
hot_team->t.t_nproc = new_nth;
#if KMP_NESTED_HOT_TEAMS
if( thread->th.th_hot_teams ) {
KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
thread->th.th_hot_teams[0].hot_team_nth = new_nth;
}
#endif
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
//
// Update the t_nproc field in the threads that are still active.
//
for( f=0 ; f < new_nth; f++ ) {
KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
}
// Special flag in case omp_set_num_threads() call
hot_team->t.t_size_changed = -1;
}
}
/* Changes max_active_levels */
void
__kmp_set_max_active_levels( int gtid, int max_active_levels )
{
kmp_info_t *thread;
KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );