[OpenMP] Directly use user's grid and block size in kernel language mode (#70612) In kernel language mode, use user's grid and blocks size directly. No validity check, which means if user's values are too large, the launch will fail, similar to what CUDA and HIP are doing right now. GitOrigin-RevId: 3768039913be32666a316a2b5c12739c423dbc61

commit: 6d6e5c1f915af26294910b0d40ec243ec21200d0 [log] [tgz]
author: Shilei Tian <i@tianshilei.me> Mon Dec 18 12:26:18 2023 -0500
committer: Copybara-Service <copybara-worker@google.com> Mon Dec 18 09:28:57 2023 -0800
tree: 3f21e74215b6dba4fccb0946049eb36cf3b49e87
parent: 2b0a0c93041096d3b9dc7008e3c7b9cba1bec889 [diff]
diff --git a/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 716b0ad..28484ae 100644
--- a/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/libomptarget/plugins-nextgen/common/include/PluginInterface.h

@@ -397,6 +397,9 @@
 
   /// The prototype kernel launch environment.
   KernelLaunchEnvironmentTy KernelLaunchEnvironment;
+
+  /// If the kernel is a bare kernel.
+  bool IsBareKernel = false;
 };
 
 /// Class representing a map of host pinned allocations. We track these pinned

diff --git a/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 1d96468..1c9777d 100644
--- a/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp

@@ -436,6 +436,7 @@
        Name, ErrStr.data());
     assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
            "Default initialization failed.");
+    IsBareKernel = true;
   }
 
   // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
@@ -594,6 +595,10 @@
                                         uint32_t ThreadLimitClause[3]) const {
   assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
+
+  if (IsBareKernel && ThreadLimitClause[0] > 0)
+    return ThreadLimitClause[0];
+
   if (ThreadLimitClause[0] > 0 && isGenericMode())
     ThreadLimitClause[0] += GenericDevice.getWarpSize();
 
@@ -610,6 +615,9 @@
   assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
          "Multi dimensional launch not supported yet.");
 
+  if (IsBareKernel && NumTeamsClause[0] > 0)
+    return NumTeamsClause[0];
+
   if (NumTeamsClause[0] > 0) {
     // TODO: We need to honor any value and consequently allow more than the
     // block limit. For this we might need to start multiple kernels or let the

diff --git a/libomptarget/test/offloading/ompx_bare.c b/libomptarget/test/offloading/ompx_bare.c
new file mode 100644
index 0000000..fb3810b
--- /dev/null
+++ b/libomptarget/test/offloading/ompx_bare.c

@@ -0,0 +1,38 @@
+// RUN: %libomptarget-compile-generic
+// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic
+//
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+
+#include <assert.h>
+#include <ompx.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  const int num_blocks = 64;
+  const int block_size = 64;
+  const int N = num_blocks * block_size;
+  int *data = (int *)malloc(N * sizeof(int));
+
+  // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with 64 blocks and 64 threads in SPMD mode
+
+#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
+  {
+    int bid = ompx_block_id_x();
+    int bdim = ompx_block_dim_x();
+    int tid = ompx_thread_id_x();
+    int idx = bid * bdim + tid;
+    data[idx] = idx;
+  }
+
+  for (int i = 0; i < N; ++i)
+    assert(data[i] == i);
+
+  // CHECK: PASS
+  printf("PASS\n");
+
+  return 0;
+}
commit	6d6e5c1f915af26294910b0d40ec243ec21200d0	[log] [tgz]
author	Shilei Tian <i@tianshilei.me>	Mon Dec 18 12:26:18 2023 -0500
committer	Copybara-Service <copybara-worker@google.com>	Mon Dec 18 09:28:57 2023 -0800
tree	3f21e74215b6dba4fccb0946049eb36cf3b49e87
parent	2b0a0c93041096d3b9dc7008e3c7b9cba1bec889 [diff]