[OPENMP][NVPTX]Improved omp_get_thread_limit() function.
Summary:
Function omp_get_thread_limit() in SPMD mode can return the maximum
available number of threads as a result.
Reviewers: grokos, gtbercea, kkwli0
Subscribers: guansong, jdoerfert, openmp-commits, caomhin
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D61378
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@359790 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index c3877a8..3a0c39c 100644
--- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -73,14 +73,11 @@
}
EXTERN int omp_get_thread_limit(void) {
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
- "Expected SPMD mode only with uninitialized runtime.");
- return 0; // default is 0
- }
+ if (isSPMDMode())
+ return GetNumberOfThreadsInBlock();
// per contention group.. meaning threads in current team
omptarget_nvptx_TaskDescr *currTaskDescr =
- getMyTopTaskDescriptor(isSPMDMode());
+ getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
int rc = currTaskDescr->ThreadLimit();
PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc);
return rc;
diff --git a/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c b/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c
new file mode 100644
index 0000000..626d620
--- /dev/null
+++ b/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c
@@ -0,0 +1,72 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+ int ThreadLimitL0 = -1, ThreadLimitL1 = -1, ThreadLimitL2 = -1;
+
+#pragma omp declare reduction(unique64:int \
+ : omp_out = (omp_in == 64 ? omp_in : omp_out)) \
+ initializer(omp_priv = -1)
+#pragma omp declare reduction(unique32:int \
+ : omp_out = (omp_in == 32 ? omp_in : omp_out)) \
+ initializer(omp_priv = -1)
+
+ // Non-SPMD mode.
+#pragma omp target teams map(ThreadLimitL0, ThreadLimitL1, ThreadLimitL2) \
+ thread_limit(64) num_teams(1)
+ {
+ ThreadLimitL0 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique64 \
+ : ThreadLimitL1, ThreadLimitL2) num_threads(32)
+ {
+ ThreadLimitL1 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique64 : ThreadLimitL2)
+ { ThreadLimitL2 = omp_get_thread_limit(); }
+ }
+ }
+
+ // CHECK: Non-SPMD ThreadLimitL0 = 64
+ printf("Non-SPMD ThreadLimitL0 = %d\n", ThreadLimitL0);
+ // CHECK: Non-SPMD ThreadLimitL1 = 64
+ printf("Non-SPMD ThreadLimitL1 = %d\n", ThreadLimitL1);
+ // CHECK: Non-SPMD ThreadLimitL2 = 64
+ printf("Non-SPMD ThreadLimitL2 = %d\n", ThreadLimitL2);
+
+ // SPMD mode with full runtime
+ ThreadLimitL1 = -1;
+ ThreadLimitL2 = -1;
+#pragma omp target parallel reduction(unique32 \
+ : ThreadLimitL1, ThreadLimitL2) \
+ num_threads(32)
+ {
+ ThreadLimitL1 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique32 : ThreadLimitL2)
+ { ThreadLimitL2 = omp_get_thread_limit(); }
+ }
+
+ // CHECK: SPMD with full runtime ThreadLimitL1 = 32
+ printf("SPMD with full runtime ThreadLimitL1 = %d\n", ThreadLimitL1);
+ // CHECK: SPMD with full runtime ThreadLimitL2 = 32
+ printf("SPMD with full runtime ThreadLimitL2 = %d\n", ThreadLimitL2);
+
+ // SPMD mode without runtime
+ ThreadLimitL1 = -1;
+ ThreadLimitL2 = -1;
+#pragma omp target parallel for reduction(unique32 \
+ : ThreadLimitL1, ThreadLimitL2) \
+ num_threads(32)
+ for (int I = 0; I < 2; ++I) {
+ ThreadLimitL1 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique32 : ThreadLimitL2)
+ { ThreadLimitL2 = omp_get_thread_limit(); }
+ }
+
+ // CHECK: SPMD without runtime ThreadLimitL1 = 32
+ printf("SPMD without runtime ThreadLimitL1 = %d\n", ThreadLimitL1);
+ // CHECK: SPMD without runtime ThreadLimitL2 = 32
+ printf("SPMD without runtime ThreadLimitL2 = %d\n", ThreadLimitL2);
+
+ return 0;
+}