[OpenMP] libomp: add check of task function pointer for NULL.

This patch allows to simplify compiler implementation on "taskwait nowait"
construct. The "taskwait nowait" is semantically equivalent to the empty task.
Instead of creating an empty routine as a task entry, compiler can just send
NULL pointer to the runtime. Then the runtime will make all the work with
dependences and return because of the absent task routine.

Differential Revision: https://reviews.llvm.org/D112015

GitOrigin-RevId: 63f8099e233c37bae1dcf2629ec62db8de6cc356
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 32e85a6..d956df1 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -1621,13 +1621,15 @@
     KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
 #endif
 
+    if (task->routine != NULL) {
 #ifdef KMP_GOMP_COMPAT
-    if (taskdata->td_flags.native) {
-      ((void (*)(void *))(*(task->routine)))(task->shareds);
-    } else
+      if (taskdata->td_flags.native) {
+        ((void (*)(void *))(*(task->routine)))(task->shareds);
+      } else
 #endif /* KMP_GOMP_COMPAT */
-    {
-      (*(task->routine))(gtid, task);
+      {
+        (*(task->routine))(gtid, task);
+      }
     }
     KMP_POP_PARTITIONED_TIMER();
 
diff --git a/runtime/test/tasking/kmp_taskwait_nowait.c b/runtime/test/tasking/kmp_taskwait_nowait.c
new file mode 100644
index 0000000..809642a
--- /dev/null
+++ b/runtime/test/tasking/kmp_taskwait_nowait.c
@@ -0,0 +1,86 @@
+// RUN: %libomp-compile-and-run
+
+// test checks IN dep kind in depend clause on taskwait nowait
+// uses codegen emulation
+// Note: no outlined task routine used
+#include <stdio.h>
+#include <omp.h>
+// ---------------------------------------------------------------------------
+// internal data to emulate compiler codegen
+#define TIED 1
+typedef struct DEP {
+  size_t addr;
+  size_t len;
+  int flags;
+} _dep;
+typedef struct ID {
+  int reserved_1;
+  int flags;
+  int reserved_2;
+  int reserved_3;
+  char *psource;
+} _id;
+typedef struct task {
+  void** shareds;
+  void* entry;
+  int part_id;
+  void* destr_thunk;
+  int priority;
+  long long device_id;
+  int f_priv;
+} task_t;
+typedef int(*entry_t)(int, task_t*);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int __kmpc_global_thread_num(_id*);
+task_t *__kmpc_omp_task_alloc(_id *loc, int gtid, int flags,
+                              size_t sz, size_t shar, entry_t rtn);
+int __kmpc_omp_task_with_deps(_id *loc, int gtid, task_t *task, int ndeps,
+                              _dep *dep_lst, int nd_noalias, _dep *noalias_l);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+int main()
+{
+  int i1,i2,i3;
+  omp_set_num_threads(2);
+  printf("addresses: %p %p %p\n", &i1, &i2, &i3);
+  #pragma omp parallel
+  {
+    int t = omp_get_thread_num();
+    printf("thread %d enters parallel\n", t);
+    #pragma omp single
+    {
+      #pragma omp task depend(in: i3)
+      {
+        int th = omp_get_thread_num();
+        printf("task 0 created by th %d, executed by th %d\n", t, th);
+      }
+      #pragma omp task depend(in: i2)
+      {
+        int th = omp_get_thread_num();
+        printf("task 1 created by th %d, executed by th %d\n", t, th);
+      }
+//      #pragma omp taskwait depend(in: i1, i2) nowait
+      {
+        _dep sdep[2];
+        static _id loc = {0, 2, 0, 0, ";test.c;func;67;0;;"};
+        int gtid = __kmpc_global_thread_num(&loc);
+// instead of creating an empty task function we can now send NULL to runtime
+        task_t *ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED,
+                                            sizeof(task_t), 0, NULL);
+        sdep[0].addr = (size_t)&i2;
+        sdep[0].flags = 1; // 1-in, 2-out, 3-inout, 4-mtx, 8-inoutset
+        sdep[1].addr = (size_t)&i1;
+        sdep[1].flags = 1; // in
+        __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, NULL);
+      }
+      printf("single done\n");
+    }
+  }
+  printf("passed\n");
+  return 0;
+}