[OpenMP 5.0] Deprecate nest-var and associated features

Nest-var, OMP_NESTED, omp_set_nested()., and omp_get_nested() have been
deprecated in the 5.0 spec. Initial nesting info is now derived from
OMP_MAX_ACTIVE_LEVELS, OMP_NUM_THREADS, and OMP_PROC_BIND.

This patch deprecates the internal ICV that corresponds to nest-var, and
replaces it with the max-active-levels-var ICV to determine nesting. The
change still allows for use of OMP_NESTED (according to 5.0 changes),
omp_get_nested, and omp_set_nested, which have had deprecation messages
added to them. The change allows certain settings of OMP_NUM_THREADS,
OMP_PROC_BIND, and OMP_MAX_ACTIVE_LEVELS to turn on nesting, but
OMP_NESTED=0 will still force nesting to be off.

The runtime now prints informative messages about deprecation of
OMP_NESTED, omp_set_nested(), and omp_get_nested(), when those
environment variables or routines are used. It also prints deprecated
message in output for KMP_SETTINGS and OMP_DISPLAY_ENV for OMP_NESTED.
This patch also fixes OMP_DISPLAY_ENV output for OMP_TARGET_OFFLOAD.

Patch by Terry Wilmarth

Differential Revision: https://reviews.llvm.org/D58408


git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@355138 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/runtime/src/i18n/en_US.txt b/runtime/src/i18n/en_US.txt
index cf262d9..822f73c 100644
--- a/runtime/src/i18n/en_US.txt
+++ b/runtime/src/i18n/en_US.txt
@@ -425,7 +425,7 @@
 AffHWSubsetManyProcs         "KMP_HW_SUBSET ignored: too many Procs requested."
 HierSchedInvalid             "Hierarchy ignored: unsupported level: %1$s."
 AffFormatDefault             "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}"
-
+APIDeprecated                "%1$s routine deprecated, please use %2$s instead."
 
 # --------------------------------------------------------------------------------------------------
 -*- HINTS -*-
@@ -489,4 +489,3 @@
 # --------------------------------------------------------------------------------------------------
 # end of file #
 # --------------------------------------------------------------------------------------------------
-
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index 5125eca..93fcf0f 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -1847,7 +1847,6 @@
 typedef struct kmp_internal_control {
   int serial_nesting_level; /* corresponds to the value of the
                                th_team_serialized field */
-  kmp_int8 nested; /* internal control for nested parallelism (per thread) */
   kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
                        thread) */
   kmp_int8
@@ -2054,8 +2053,6 @@
   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
 #endif
 
-#define get__nested_2(xteam, xtid)                                             \
-  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
 #define get__dynamic_2(xteam, xtid)                                            \
   ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
 #define get__nproc_2(xteam, xtid)                                              \
@@ -2076,11 +2073,6 @@
 #define set__bt_set_team(xteam, xtid, xval)                                    \
   (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
 
-#define set__nested(xthread, xval)                                             \
-  (((xthread)->th.th_current_task->td_icvs.nested) = (xval))
-#define get__nested(xthread)                                                   \
-  (((xthread)->th.th_current_task->td_icvs.nested) ? (FTN_TRUE) : (FTN_FALSE))
-
 #define set__dynamic(xthread, xval)                                            \
   (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
 #define get__dynamic(xthread)                                                  \
@@ -2095,6 +2087,9 @@
 #define set__max_active_levels(xthread, xval)                                  \
   (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
 
+#define get__max_active_levels(xthread)                                        \
+  ((xthread)->th.th_current_task->td_icvs.max_active_levels)
+
 #define set__sched(xthread, xval)                                              \
   (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
 
@@ -2821,8 +2816,6 @@
   // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
   // the synch overhead or keeping r_active
   volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
-  // GEH: This is misnamed, should be r_in_parallel
-  volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely.
   // keeps a count of active parallel regions per root
   std::atomic<int> r_in_parallel;
   // GEH: This is misnamed, should be r_active_levels
@@ -3013,8 +3006,6 @@
                                  used (fixed) */
 extern int __kmp_tp_cached; /* whether threadprivate cache has been created
                                (__kmpc_threadprivate_cached()) */
-extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la
-                                 OMP_NESTED */
 extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
                                     blocking (env setting) */
 #if KMP_USE_MONITOR
@@ -3056,9 +3047,12 @@
 extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested
-                                            parallelism enabled by default via
-                                            OMP_MAX_ACTIVE_LEVELS */
+// max_active_levels for nested parallelism enabled by default via
+// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
+extern int __kmp_dflt_max_active_levels;
+// Indicates whether value of __kmp_dflt_max_active_levels was already
+// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
+extern bool __kmp_dflt_max_active_levels_set;
 extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
                                           concurrent execution per team */
 #if KMP_NESTED_HOT_TEAMS
diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp
index 592a266..4cc8757 100644
--- a/runtime/src/kmp_csupport.cpp
+++ b/runtime/src/kmp_csupport.cpp
@@ -1872,7 +1872,7 @@
 
   __kmp_save_internal_controls(thread);
 
-  set__nested(thread, flag ? TRUE : FALSE);
+  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
 }
 
 void ompc_set_max_active_levels(int max_active_levels) {
diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h
index 6fd089f..9eb80b4 100644
--- a/runtime/src/kmp_ftn_entry.h
+++ b/runtime/src/kmp_ftn_entry.h
@@ -595,6 +595,7 @@
 }
 
 void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NESTED)(int KMP_DEREF flag) {
+  KMP_INFORM(APIDeprecated, "omp_set_nested", "omp_set_max_active_levels");
 #ifdef KMP_STUB
   __kmps_set_nested(KMP_DEREF flag);
 #else
@@ -602,17 +603,22 @@
   /* For the thread-private internal controls implementation */
   thread = __kmp_entry_thread();
   __kmp_save_internal_controls(thread);
-  set__nested(thread, ((KMP_DEREF flag) ? TRUE : FALSE));
+  // Somewhat arbitrarily decide where to get a value for max_active_levels
+  int max_active_levels = get__max_active_levels(thread);
+  if (max_active_levels == 1)
+    max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
+  set__max_active_levels(thread, (KMP_DEREF flag) ? max_active_levels : 1);
 #endif
 }
 
 int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NESTED)(void) {
+  KMP_INFORM(APIDeprecated, "omp_get_nested", "omp_get_max_active_levels");
 #ifdef KMP_STUB
   return __kmps_get_nested();
 #else
   kmp_info_t *thread;
   thread = __kmp_entry_thread();
-  return get__nested(thread);
+  return get__max_active_levels(thread) > 1;
 #endif
 }
 
diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp
index 627724c..1805ade 100644
--- a/runtime/src/kmp_global.cpp
+++ b/runtime/src/kmp_global.cpp
@@ -127,10 +127,9 @@
 int __kmp_dflt_team_nth_ub = 0;
 int __kmp_tp_capacity = 0;
 int __kmp_tp_cached = 0;
-int __kmp_dflt_nested = FALSE;
 int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF;
-int __kmp_dflt_max_active_levels =
-    KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */
+int __kmp_dflt_max_active_levels = 1; // Nesting off by default
+bool __kmp_dflt_max_active_levels_set = false; // Don't override set value
 #if KMP_NESTED_HOT_TEAMS
 int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */
 /* 1 - keep extra threads when reduced */
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index 2a9e31d..e6b673b 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -1719,11 +1719,11 @@
       // parallel out of teams construct). This code moved here from
       // __kmp_reserve_threads() to speedup nested serialized parallels.
       if (nthreads > 1) {
-        if ((!get__nested(master_th) && (root->r.r_in_parallel
+        if ((get__max_active_levels(master_th) == 1 && (root->r.r_in_parallel
 #if OMP_40_ENABLED
-                                         && !enter_teams
+                                                        && !enter_teams
 #endif /* OMP_40_ENABLED */
-                                         )) ||
+                                                        )) ||
             (__kmp_library == library_serial)) {
           KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
                         " threads\n",
@@ -3150,8 +3150,6 @@
 
   kmp_internal_control_t g_icvs = {
     0, // int serial_nesting_level; //corresponds to value of th_team_serialized
-    (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
-    // for nested parallelism (per thread)
     (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
     // adjustment of threads (per thread)
     (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
@@ -3207,7 +3205,6 @@
   root->r.r_active = FALSE;
   root->r.r_in_parallel = 0;
   root->r.r_blocktime = __kmp_dflt_blocktime;
-  root->r.r_nested = __kmp_dflt_nested;
 
   /* setup the root team for this task */
   /* allocate the root team structure */
@@ -3432,7 +3429,6 @@
         __kmp_print_structure_thread("    Uber Thread:  ",
                                      root->r.r_uber_thread);
         __kmp_printf("    Active?:      %2d\n", root->r.r_active);
-        __kmp_printf("    Nested?:      %2d\n", root->r.r_nested);
         __kmp_printf("    In Parallel:  %2d\n",
                      KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
         __kmp_printf("\n");
diff --git a/runtime/src/kmp_settings.cpp b/runtime/src/kmp_settings.cpp
index 68e36f5..b61df06 100644
--- a/runtime/src/kmp_settings.cpp
+++ b/runtime/src/kmp_settings.cpp
@@ -975,12 +975,27 @@
 
 static void __kmp_stg_parse_nested(char const *name, char const *value,
                                    void *data) {
-  __kmp_stg_parse_bool(name, value, &__kmp_dflt_nested);
+  int nested;
+  KMP_INFORM(EnvVarDeprecated, name, "OMP_MAX_ACTIVE_LEVELS");
+  __kmp_stg_parse_bool(name, value, &nested);
+  if (nested) {
+    if (!__kmp_dflt_max_active_levels_set)
+      __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
+  } else { // nesting explicitly turned off
+    __kmp_dflt_max_active_levels = 1;
+    __kmp_dflt_max_active_levels_set = true;
+  }
 } // __kmp_stg_parse_nested
 
 static void __kmp_stg_print_nested(kmp_str_buf_t *buffer, char const *name,
                                    void *data) {
-  __kmp_stg_print_bool(buffer, name, __kmp_dflt_nested);
+  if (__kmp_env_format) {
+    KMP_STR_BUF_PRINT_NAME;
+  } else {
+    __kmp_str_buf_print(buffer, "   %s", name);
+  }
+  __kmp_str_buf_print(buffer, ": deprecated; max-active-levels-var=%d\n",
+                      __kmp_dflt_max_active_levels);
 } // __kmp_stg_print_nested
 
 static void __kmp_parse_nested_num_threads(const char *var, const char *env,
@@ -1026,6 +1041,8 @@
       }
     }
   }
+  if (!__kmp_dflt_max_active_levels_set && total > 1)
+    __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
   KMP_DEBUG_ASSERT(total > 0);
   if (total <= 0) {
     KMP_WARNING(NthSyntaxError, var, env);
@@ -1182,8 +1199,22 @@
 
 static void __kmp_stg_parse_max_active_levels(char const *name,
                                               char const *value, void *data) {
-  __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT,
-                      &__kmp_dflt_max_active_levels);
+  kmp_uint64 tmp_dflt = 0;
+  char const *msg = NULL;
+  if (!__kmp_dflt_max_active_levels_set) {
+    // Don't overwrite __kmp_dflt_max_active_levels if we get an invalid setting
+    __kmp_str_to_uint(value, &tmp_dflt, &msg);
+    if (msg != NULL) { // invalid setting; print warning and ignore
+      KMP_WARNING(ParseSizeIntWarn, name, value, msg);
+    } else if (tmp_dflt > KMP_MAX_ACTIVE_LEVELS_LIMIT) {
+      // invalid setting; print warning and ignore
+      msg = KMP_I18N_STR(ValueTooLarge);
+      KMP_WARNING(ParseSizeIntWarn, name, value, msg);
+    } else { // valid setting
+      __kmp_dflt_max_active_levels = tmp_dflt;
+      __kmp_dflt_max_active_levels_set = true;
+    }
+  }
 } // __kmp_stg_parse_max_active_levels
 
 static void __kmp_stg_print_max_active_levels(kmp_str_buf_t *buffer,
@@ -1240,9 +1271,13 @@
     value = "MANDATORY";
   else if (__kmp_target_offload == tgt_disabled)
     value = "DISABLED";
-  if (value) {
-    __kmp_str_buf_print(buffer, "   %s=%s\n", name, value);
+  KMP_DEBUG_ASSERT(value);
+  if (__kmp_env_format) {
+    KMP_STR_BUF_PRINT_NAME;
+  } else {
+    __kmp_str_buf_print(buffer, "   %s", name);
   }
+  __kmp_str_buf_print(buffer, "=%s\n", value);
 } // __kmp_stg_print_target_offload
 #endif
 
@@ -3162,6 +3197,9 @@
     }
     __kmp_nested_proc_bind.used = nelem;
 
+    if (nelem > 1 && !__kmp_dflt_max_active_levels_set)
+      __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
+
     // Save values in the nested proc_bind array
     int i = 0;
     for (;;) {
@@ -5248,7 +5286,7 @@
   /* OMP_NESTED */
   value = __kmp_env_blk_var(block, "OMP_NESTED");
   if (value) {
-    ompc_set_nested(__kmp_dflt_nested);
+    ompc_set_nested(__kmp_dflt_max_active_levels > 1);
   }
 
   /* OMP_DYNAMIC */