[OMPT] Handling of the events of initial-task-begin and initial-task-end

OpenMP 5.0 says that the callback for the events initial-task-begin and
initial-task-end has to be ompt_callback_implicit_task.

Patch by Tim Cramer

Differential Revision: https://reviews.llvm.org/D58776

git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@361157 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index fcf8803..7da7f08 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -3897,11 +3897,11 @@
           ompt_thread_initial, __ompt_get_thread_data_internal());
     }
     ompt_data_t *task_data;
-    __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
-    if (ompt_enabled.ompt_callback_task_create) {
-      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
-          NULL, NULL, task_data, ompt_task_initial, 0, NULL);
-      // initial task has nothing to return to
+    ompt_data_t *parallel_data;
+    __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data, NULL);
+    if (ompt_enabled.ompt_callback_implicit_task) {
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
     }
 
     ompt_set_thread_state(root_thread, ompt_state_work_serial);
@@ -3991,6 +3991,13 @@
 #endif /* KMP_OS_WINDOWS */
 
 #if OMPT_SUPPORT
+  ompt_data_t *task_data;
+  ompt_data_t *parallel_data;
+  __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data, NULL);
+  if (ompt_enabled.ompt_callback_implicit_task) {
+    ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+        ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
+  }
   if (ompt_enabled.ompt_callback_thread_end) {
     ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
         &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
diff --git a/runtime/src/ompt-general.cpp b/runtime/src/ompt-general.cpp
index 11a195e..55a96a4 100644
--- a/runtime/src/ompt-general.cpp
+++ b/runtime/src/ompt-general.cpp
@@ -361,10 +361,11 @@
           ompt_thread_initial, __ompt_get_thread_data_internal());
     }
     ompt_data_t *task_data;
-    __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
-    if (ompt_enabled.ompt_callback_task_create) {
-      ompt_callbacks.ompt_callback(ompt_callback_task_create)(
-          NULL, NULL, task_data, ompt_task_initial, 0, NULL);
+    ompt_data_t *parallel_data;
+    __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data, NULL);
+    if (ompt_enabled.ompt_callback_implicit_task) {
+      ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+          ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
     }
 
     ompt_set_thread_state(root_thread, ompt_state_work_serial);
diff --git a/runtime/test/ompt/callback.h b/runtime/test/ompt/callback.h
index d49c4c7..c2e1530 100755
--- a/runtime/test/ompt/callback.h
+++ b/runtime/test/ompt/callback.h
@@ -452,10 +452,29 @@
       if(task_data->ptr)
         printf("%s\n", "0: task_data initially not null");
       task_data->value = ompt_get_unique_id();
-      printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num);
+
+      //there is no parallel_begin callback for implicit parallel region
+      //thus it is initialized in initial task
+      if(flags & ompt_task_initial)
+      {
+        char buffer[2048];
+
+        format_task_type(flags, buffer);
+        if(parallel_data->ptr)
+          printf("%s\n", "0: parallel_data initially not null");
+        parallel_data->value = ompt_get_unique_id();
+        printf("%" PRIu64 ": ompt_event_initial_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32 ", index=%" PRIu32 ", flags=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num, flags);
+      } else {
+        printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num);
+      }
+
       break;
     case ompt_scope_end:
-      printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
+      if(flags & ompt_task_initial){
+        printf("%" PRIu64 ": ompt_event_initial_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
+      } else {
+        printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num);
+      }
       break;
   }
 }
@@ -628,17 +647,6 @@
 
   format_task_type(type, buffer);
 
-  //there is no parallel_begin callback for implicit parallel region
-  //thus it is initialized in initial task
-  if(type & ompt_task_initial)
-  {
-    ompt_data_t *parallel_data;
-    ompt_get_parallel_info(0, &parallel_data, NULL);
-    if(parallel_data->ptr)
-      printf("%s\n", "0: parallel_data initially not null");
-    parallel_data->value = ompt_get_unique_id();
-  }
-
   printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL, encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
 }
 
diff --git a/runtime/test/ompt/cancel/cancel_parallel.c b/runtime/test/ompt/cancel/cancel_parallel.c
index b03239d..9456b67 100644
--- a/runtime/test/ompt/cancel/cancel_parallel.c
+++ b/runtime/test/ompt/cancel/cancel_parallel.c
@@ -24,11 +24,11 @@
   }
 
   // Check if libomp supports the callbacks for this test.
-  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
   // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
 
   // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, actual_parallelism=1, index=1, flags=1 
   // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
   // CHECK-DAG: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
 
diff --git a/runtime/test/ompt/cancel/cancel_worksharing.c b/runtime/test/ompt/cancel/cancel_worksharing.c
index 9513d74..8576f96 100644
--- a/runtime/test/ompt/cancel/cancel_worksharing.c
+++ b/runtime/test/ompt/cancel/cancel_worksharing.c
@@ -51,12 +51,12 @@
 
 
   // Check if libomp supports the callbacks for this test.
-  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
   // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel'
 
   // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
-  
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, actual_parallelism=1, index=1, flags=1
+ 
   // cancel for and sections
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_loop|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}}
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_{{activated=18|detected=34}}, codeptr_ra={{0x[0-f]*}}
diff --git a/runtime/test/ompt/misc/interoperability.cpp b/runtime/test/ompt/misc/interoperability.cpp
index 927669e..cbb0e87 100644
--- a/runtime/test/ompt/misc/interoperability.cpp
+++ b/runtime/test/ompt/misc/interoperability.cpp
@@ -44,7 +44,6 @@
 }
 
 // Check if libomp supports the callbacks for this test.
-// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
 // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule'
 // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin'
 // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end'
@@ -57,12 +56,9 @@
 // CHECK: {{^}}[[MASTER_ID_1:[0-9]+]]: ompt_event_thread_begin:
 // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_1]]
 
-// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_task_create: parent_task_id=0
-// CHECK-SAME: parent_task_frame.exit=[[NULL]]
-// CHECK-SAME: parent_task_frame.reenter=[[NULL]]
-// CHECK-SAME: new_task_id=[[PARENT_TASK_ID_1:[0-9]+]]
-// CHECK-SAME: codeptr_ra=[[NULL]], task_type=ompt_task_initial=1
-// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}
+// CHECK-SAME: task_id=[[PARENT_TASK_ID_1:[0-9]+]], actual_parallelism=1, index=1, flags=1 
 
 // CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_parallel_begin:
 // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_1]]
@@ -75,6 +71,10 @@
 // CHECK-SAME: parallel_id=[[PARALLEL_ID_1]], task_id=[[PARENT_TASK_ID_1]]
 // CHECK-SAME: invoker={{[0-9]+}}
 
+// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_initial_task_end:
+// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[PARENT_TASK_ID_1]],
+// CHECK-SAME: team_size=0, thread_num=1
+
 // CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_thread_end:
 // CHECK-SAME: thread_id=[[MASTER_ID_1]]
 
@@ -82,12 +82,8 @@
 // CHECK: {{^}}[[MASTER_ID_2:[0-9]+]]: ompt_event_thread_begin:
 // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_2]]
 
-// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_task_create: parent_task_id=0
-// CHECK-SAME: parent_task_frame.exit=[[NULL]]
-// CHECK-SAME: parent_task_frame.reenter=[[NULL]]
-// CHECK-SAME: new_task_id=[[PARENT_TASK_ID_2:[0-9]+]]
-// CHECK-SAME: codeptr_ra=[[NULL]], task_type=ompt_task_initial=1
-// CHECK-SAME: has_dependences=no
+// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}
+// CHECK-SAME: task_id=[[PARENT_TASK_ID_2:[0-9]+]], actual_parallelism=1, index=1, flags=1 
 
 // CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_parallel_begin:
 // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_2]]
@@ -101,12 +97,17 @@
 // CHECK-SAME: parallel_id=[[PARALLEL_ID_2]], task_id=[[PARENT_TASK_ID_2]]
 // CHECK-SAME: invoker={{[0-9]+}}
 
+// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_initial_task_end:
+// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[PARENT_TASK_ID_2]],
+// CHECK-SAME: team_size=0, thread_num=1
+
 // CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_thread_end:
 // CHECK-SAME: thread_id=[[MASTER_ID_2]]
 
 // first worker thread
 // CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_thread_begin:
 // CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID_1]]
+// CHECK-NOT: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_initial_task_end:
 
 // CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_thread_end:
 // CHECK-SAME: thread_id=[[THREAD_ID_1]]
diff --git a/runtime/test/ompt/parallel/no_thread_num_clause.c b/runtime/test/ompt/parallel/no_thread_num_clause.c
index e23d89a..5583036 100644
--- a/runtime/test/ompt/parallel/no_thread_num_clause.c
+++ b/runtime/test/ompt/parallel/no_thread_num_clause.c
@@ -51,7 +51,8 @@
 
   // THREADS: 0: NULL_POINTER=[[NULL:.*$]]
   // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=281474976710658, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, actual_parallelism=1, index=1, flags=1 
+
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}}
 
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
diff --git a/runtime/test/ompt/tasks/serialized.c b/runtime/test/ompt/tasks/serialized.c
index b1ef45d..a2c102a 100644
--- a/runtime/test/ompt/tasks/serialized.c
+++ b/runtime/test/ompt/tasks/serialized.c
@@ -48,11 +48,9 @@
   // make sure initial data pointers are null
   // CHECK-NOT: 0: new_task_data initially not null
 
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create
-  // CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]]
-  // CHECK-SAME: parent_task_frame.reenter=[[NULL]]
-  // CHECK-SAME: new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]]
-  // CHECK-SAME: task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}
+  // CHECK-SAME: task_id={{[0-9]+}}, actual_parallelism=1, index=1, flags=1 
+
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)
   // CHECK-SAME: =[[MAIN_REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
diff --git a/runtime/test/ompt/tasks/task_types.c b/runtime/test/ompt/tasks/task_types.c
index 40ceb2d..1522635 100644
--- a/runtime/test/ompt/tasks/task_types.c
+++ b/runtime/test/ompt/tasks/task_types.c
@@ -87,14 +87,12 @@
 
   // Check if libomp supports the callbacks for this test.
   // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
 
   // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
 
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0
-  // CHECK-SAME: parent_task_frame.exit=[[NULL]]
-  // CHECK-SAME: parent_task_frame.reenter=[[NULL]]
-  // CHECK-SAME: new_task_id=[[INITIAL_TASK_ID:[0-9]+]], codeptr_ra=[[NULL]]
-  // CHECK-SAME: task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}
+  // CHECK-SAME: task_id=[[INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1, index=1, flags=1 
 
   // CHECK-NOT: 0: parallel_data initially not null
 
diff --git a/runtime/test/ompt/tasks/task_types_serialized.c b/runtime/test/ompt/tasks/task_types_serialized.c
index 7726f5b..3fe163e 100644
--- a/runtime/test/ompt/tasks/task_types_serialized.c
+++ b/runtime/test/ompt/tasks/task_types_serialized.c
@@ -83,11 +83,12 @@
 
   // Check if libomp supports the callbacks for this test.
   // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
 
 
   // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
   
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, actual_parallelism=1, index=1, flags=1
   // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1
   // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730
 
diff --git a/runtime/test/ompt/tasks/taskloop.c b/runtime/test/ompt/tasks/taskloop.c
index 59a47bf..af7f778 100644
--- a/runtime/test/ompt/tasks/taskloop.c
+++ b/runtime/test/ompt/tasks/taskloop.c
@@ -64,8 +64,7 @@
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
   // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
 
-  // TASKS: ompt_event_task_create:{{.*}} new_task_id={{[0-9]+}}
-  // TASKS-SAME: task_type=ompt_task_initial
+  // TASKS: ompt_event_initial_task_begin:{{.*}} task_id={{[0-9]+}}
   // TASKS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskloop_begin:
   // TASKS: ompt_event_task_create:{{.*}} new_task_id=[[TASK_ID1:[0-9]+]]
   // TASKS-SAME: task_type=ompt_task_explicit