[OpenMP][OMPT][clang] task frame support fixed in __kmpc_fork_call

__kmp_fork_call sets the enter_frame of the active task (th_curren_task)
before new parallel region begins. After the region is finished, the
enter_frame is cleared.

The old implementation of __kmpc_fork_call didn’t clear the enter_frame of
active task.

Also, the way of initializing the enter_frame of the active task was wrong.
Consider the following two OpenMP programs.

The first program: Let R1 be the serialized parallel region that encloses
another serialized parallel region R2. Assume that thread that executes R2 is
going to create a new serialized parallel region R3 by executing
__kmpc_fork_call. This thread is responsible to set enter_frame of R2's
implicit task. Note that the information about R2's implicit task is present
inside master_th->th.th_current_task at this moment, while lwt represents the
information about R1's implicit task. The old implementation uses lwt and
resets enter_frame of R1's implicit task instead of R2's implicit task. The
new implementation uses master_th->th.th_current_task instead.

The second program: Consider the OpenMP program that contains parallel region
R1 which encloses an explicit task T. Assume that thread should create another
parallel region R2 during the execution of the T. The __kmpc_fork_call is
responsible to create R2 and set enter frame of T whose information is present
inside the master_th->th.th_current_task.
Old implementation tries to set the frame of
parent_team->t.t_implicit_task_taskdata[tid] which corresponds to the implicit
task of the R1, instead of T.

Differential Revision: https://reviews.llvm.org/D112419

GitOrigin-RevId: f2410bfb1c49b6610a8a96dfb22b20d5b478ea17
diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp
index b8cd675..e95c2f0 100644
--- a/runtime/src/kmp_csupport.cpp
+++ b/runtime/src/kmp_csupport.cpp
@@ -288,15 +288,7 @@
     ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       kmp_info_t *master_th = __kmp_threads[gtid];
-      kmp_team_t *parent_team = master_th->th.th_team;
-      ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
-      if (lwt)
-        ompt_frame = &(lwt->ompt_task_info.frame);
-      else {
-        int tid = __kmp_tid_from_gtid(gtid);
-        ompt_frame = &(
-            parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
-      }
+      ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
       ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     }
     OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -320,6 +312,12 @@
     );
 
     va_end(ap);
+
+#if OMPT_SUPPORT
+    if (ompt_enabled.enabled) {
+      ompt_frame->enter_frame = ompt_data_none;
+    }
+#endif
   }
 
 #if KMP_STATS_ENABLED
diff --git a/runtime/test/ompt/parallel/nested_serialized_task_frames.c b/runtime/test/ompt/parallel/nested_serialized_task_frames.c
new file mode 100644
index 0000000..c34f355
--- /dev/null
+++ b/runtime/test/ompt/parallel/nested_serialized_task_frames.c
@@ -0,0 +1,81 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+#pragma omp parallel num_threads(1)
+  {
+    // region 0
+#pragma omp parallel num_threads(1)
+    {
+      // region 1
+#pragma omp parallel num_threads(1)
+      {
+        // region 2
+        // region 2's implicit task
+        print_ids(0);
+        // region 1's implicit task
+        print_ids(1);
+        // region 0's implicit task
+        print_ids(2);
+        // initial task
+        print_ids(3);
+      }
+    }
+  }
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id=[[INITIAL_PARALLEL_ID:[0-9]+]], task_id=[[INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1, index=1, flags=1
+
+  // region 0
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+  // CHECK-SAME: parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[INITIAL_TASK_FRAME_ENTER:0x[0-f]+]],
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID_0:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_0]], task_id=[[TASK_ID_0:[0-9]+]]
+
+  // region 1
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+  // CHECK-SAME: parent_task_frame.exit=[[REGION_0_FRAME_EXIT:0x[0-f]+]], parent_task_frame.reenter=[[REGION_0_FRAME_ENTER:0x[0-f]+]],
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID_1:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_1]], task_id=[[TASK_ID_1:[0-9]+]]
+
+  // region 2
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+  // CHECK-SAME: parent_task_frame.exit=[[REGION_1_FRAME_EXIT:0x[0-f]+]], parent_task_frame.reenter=[[REGION_1_FRAME_ENTER:0x[0-f]+]],
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID_2:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_2]], task_id=[[TASK_ID_2:[0-9]+]]
+
+  // region 2's implicit task information (exit frame should be set, while enter should be NULL)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID_2]], task_id=[[TASK_ID_2]]
+  // CHECK-SAME: exit_frame={{0x[0-f]+}}
+  // CHECK-SAME: reenter_frame=[[NULL]]
+  // CHECK-SAME: task_type=ompt_task_implicit
+
+  // region 1's implicit task information (both exit and enter frames should be set)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID_1]], task_id=[[TASK_ID_1]]
+  // CHECK-SAME: exit_frame=[[REGION_1_FRAME_EXIT]]
+  // CHECK-SAME: reenter_frame=[[REGION_1_FRAME_ENTER]]
+  // CHECK-SAME: task_type=ompt_task_implicit
+
+  // region 0's implicit task information (both exit and enter frames should be set)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID_0]], task_id=[[TASK_ID_0]]
+  // CHECK-SAME: exit_frame=[[REGION_0_FRAME_EXIT]]
+  // CHECK-SAME: reenter_frame=[[REGION_0_FRAME_ENTER]]
+  // CHECK-SAME: task_type=ompt_task_implicit
+
+  // region 0's initial task information (both exit and enter frames should be set)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[INITIAL_PARALLEL_ID]], task_id=[[INITIAL_TASK_ID]]
+  // CHECK-SAME: exit_frame=[[NULL]]
+  // CHECK-SAME: reenter_frame=[[INITIAL_TASK_FRAME_ENTER]]
+  // CHECK-SAME: task_type=ompt_task_initial
+
+  return 0;
+}
\ No newline at end of file
diff --git a/runtime/test/ompt/parallel/region_in_expl_task_task_frames.c b/runtime/test/ompt/parallel/region_in_expl_task_task_frames.c
new file mode 100644
index 0000000..27a7e5b
--- /dev/null
+++ b/runtime/test/ompt/parallel/region_in_expl_task_task_frames.c
@@ -0,0 +1,87 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+
+#include "callback.h"
+#include <omp.h>
+
+int main()
+{
+#pragma omp parallel num_threads(2)
+  {
+    if (omp_get_thread_num() == 0) {
+      // region 0
+#pragma omp task if(0)
+      {
+        // explicit task immediately executed by the initial master thread
+#pragma omp parallel num_threads(2)
+        {
+          if (omp_get_thread_num() == 0) {
+            // Note that this is executed by the initial master thread
+            // region 1
+            // region 1's implicit task
+            print_ids(0);
+            // explicit task
+            print_ids(1);
+            // region 0's implicit task
+            print_ids(2);
+            // initial task
+            print_ids(3);
+          }
+        }
+      }
+    }
+  }
+
+  // Check if libomp supports the callbacks for this test.
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
+  // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
+
+
+  // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id=[[INITIAL_PARALLEL_ID:[0-9]+]], task_id=[[INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1, index=1, flags=1
+
+  // region 0
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+  // CHECK-SAME: parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[INITIAL_TASK_FRAME_ENTER:0x[0-f]+]],
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID_0:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_0]], task_id=[[TASK_ID_0:[0-9]+]]
+
+  // explicit task
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[TASK_ID_0]]
+  // CHECK-SAME: parent_task_frame.exit=[[REGION_0_FRAME_EXIT:0x[0-f]+]]
+  // CHECK-SAME: parent_task_frame.reenter=[[REGION_0_FRAME_ENTER:0x[0-f]+]]
+  // CHECK-SAME: new_task_id=[[TASK_ID_1:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID_0]], second_task_id=[[TASK_ID_1]]
+
+  // region 1
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
+  // CHECK-SAME: parent_task_frame.exit=[[EXPLICIT_TASK_FRAME_EXIT:0x[0-f]+]], parent_task_frame.reenter=[[EXPLICIT_TASK_FRAME_ENTER:0x[0-f]+]],
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID_1:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_1]], task_id=[[TASK_ID_2:[0-9]+]]
+
+  // region 1's implicit task information (exit frame should be set, while enter should be NULL)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID_1]], task_id=[[TASK_ID_2]]
+  // CHECK-SAME: exit_frame={{0x[0-f]+}}
+  // CHECK-SAME: reenter_frame=[[NULL]]
+  // CHECK-SAME: task_type=ompt_task_implicit
+
+  // explicit task information (both exit and enter frames should be set)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID_0]], task_id=[[TASK_ID_1]]
+  // CHECK-SAME: exit_frame=[[EXPLICIT_TASK_FRAME_EXIT]]
+  // CHECK-SAME: reenter_frame=[[EXPLICIT_TASK_FRAME_ENTER]]
+  // CHECK-SAME: task_type=ompt_task_explicit
+
+  // region 0's implicit task information (both exit and enter frames should be set)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID_0]], task_id=[[TASK_ID_0]]
+  // CHECK-SAME: exit_frame=[[REGION_0_FRAME_EXIT]]
+  // CHECK-SAME: reenter_frame=[[REGION_0_FRAME_ENTER]]
+  // CHECK-SAME: task_type=ompt_task_implicit
+
+  // region 0's initial task information (both exit and enter frames should be set)
+  // CHECK: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[INITIAL_PARALLEL_ID]], task_id=[[INITIAL_TASK_ID]]
+  // CHECK-SAME: exit_frame=[[NULL]]
+  // CHECK-SAME: reenter_frame=[[INITIAL_TASK_FRAME_ENTER]]
+  // CHECK-SAME: task_type=ompt_task_initial
+
+  return 0;
+}
\ No newline at end of file