Create a runtime option to disable task throttling.

Patch by viroulep (Philippe Virouleau)

Differential Revision: https://reviews.llvm.org/D63196


git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@364934 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index e9b343c..38a16d1 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -2121,6 +2121,7 @@
 extern kmp_tasking_mode_t
     __kmp_tasking_mode; /* determines how/when to execute tasks */
 extern int __kmp_task_stealing_constraint;
+extern int __kmp_enable_task_throttling;
 #if OMP_40_ENABLED
 extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
 // specified, defaults to 0 otherwise
diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp
index f766516..1cfb62d 100644
--- a/runtime/src/kmp_global.cpp
+++ b/runtime/src/kmp_global.cpp
@@ -341,6 +341,7 @@
 KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4);
 
 int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */
+int __kmp_enable_task_throttling = 1;
 
 #ifdef DEBUG_SUSPEND
 int __kmp_suspend_count = 0;
diff --git a/runtime/src/kmp_settings.cpp b/runtime/src/kmp_settings.cpp
index 1afba5b..f5781d4 100644
--- a/runtime/src/kmp_settings.cpp
+++ b/runtime/src/kmp_settings.cpp
@@ -4683,6 +4683,20 @@
 #endif /* USE_ITT_BUILD */
 
 // -----------------------------------------------------------------------------
+// KMP_ENABLE_TASK_THROTTLING
+
+static void __kmp_stg_parse_task_throttling(char const *name,
+                                            char const *value, void *data) {
+  __kmp_stg_parse_bool(name, value, &__kmp_enable_task_throttling);
+} // __kmp_stg_parse_task_throttling
+
+
+static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer,
+                                            char const *name, void *data) {
+  __kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling);
+} // __kmp_stg_print_task_throttling
+
+// -----------------------------------------------------------------------------
 // OMP_DISPLAY_ENV
 
 #if OMP_40_ENABLED
@@ -5003,6 +5017,8 @@
     {"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode,
      __kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0},
 #endif
+    {"KMP_ENABLE_TASK_THROTTLING", __kmp_stg_parse_task_throttling,
+     __kmp_stg_print_task_throttling, NULL, 0, 0},
 
 #if OMP_40_ENABLED
     {"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env,
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 62f5f7a..23ace2f 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -374,7 +374,8 @@
   // Check if deque is full
   if (TCR_4(thread_data->td.td_deque_ntasks) >=
       TASK_DEQUE_SIZE(thread_data->td)) {
-    if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
+    if (__kmp_enable_task_throttling &&
+        __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
                               thread->th.th_current_task)) {
       KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
                     "TASK_NOT_PUSHED for task %p\n",
@@ -394,7 +395,8 @@
     // Need to recheck as we can get a proxy task from thread outside of OpenMP
     if (TCR_4(thread_data->td.td_deque_ntasks) >=
         TASK_DEQUE_SIZE(thread_data->td)) {
-      if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
+      if (__kmp_enable_task_throttling &&
+          __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
                                 thread->th.th_current_task)) {
         __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
         KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "
diff --git a/runtime/test/tasking/omp_fill_taskqueue.c b/runtime/test/tasking/omp_fill_taskqueue.c
new file mode 100644
index 0000000..e95f97a
--- /dev/null
+++ b/runtime/test/tasking/omp_fill_taskqueue.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=0 %libomp-run
+// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=1 %libomp-run
+
+#include<omp.h>
+#include<stdlib.h>
+#include<string.h>
+
+/**
+ * Test the task throttling behavior of the runtime.
+ * Unless OMP_NUM_THREADS is 1, the master thread pushes tasks to its own tasks
+ * queue until either of the following happens:
+ *   - the task queue is full, and it starts serializing tasks
+ *   - all tasks have been pushed, and it can begin execution
+ * The idea is to create a huge number of tasks which execution are blocked
+ * until the master thread comes to execute tasks (they need to be blocking,
+ * otherwise the second thread will start emptying the queue).
+ * At this point we can check the number of enqueued tasks: iff all tasks have
+ * been enqueued, then there was no task throttling.
+ * Otherwise there has been some sort of task throttling.
+ * If what we detect doesn't match the value of the environment variable, the
+ * test is failed.
+ */
+
+
+#define NUM_TASKS 2000
+
+
+int main()
+{
+  int i;
+  int block = 1;
+  int tid;
+  int throttling = strcmp(getenv("KMP_ENABLE_TASK_THROTTLING"), "1") == 0;
+  int enqueued = 0;
+  int failed = -1;
+
+  #pragma omp parallel num_threads(2)
+  #pragma omp master
+  {
+    for (i = 0; i < NUM_TASKS; i++) {
+      enqueued++;
+      #pragma omp task
+      {
+        tid = omp_get_thread_num();
+        if (tid == 0) {
+          // As soon as the master thread starts executing task we should unlock
+          // all tasks, and detect the test failure if it has not been done yet.
+          if (failed < 0)
+            failed = throttling ? enqueued == NUM_TASKS : enqueued < NUM_TASKS;
+          block = 0;
+        }
+        while (block)
+          ;
+      }
+    }
+    block = 0;
+  }
+
+  return failed;
+}