[OPENMP][NVPTX]Perform memory flush if number of threads to sync is 1 or less.

Summary:
According to the OpenMP standard, barrier operation must perform
implicit flush operation. Currently, if there is only one thread in the
team, barrier does not flush the memory. Patch fixes this problem.

Reviewers: grokos, gtbercea, kkwli0

Subscribers: guansong, jdoerfert, openmp-commits, caomhin

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D62398

git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@367024 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 688420e..4607d6a 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -62,6 +62,9 @@
         // Barrier #1 is for synchronization among active threads.
         named_sync(L1_BARRIER, threads);
       }
+    } else {
+      // Still need to flush the memory per the standard.
+      __kmpc_flush(loc_ref);
     } // numberOfActiveOMPThreads > 1
     PRINT0(LD_SYNC, "completed kmpc_barrier\n");
   }
diff --git a/libomptarget/deviceRTLs/nvptx/test/parallel/barrier.c b/libomptarget/deviceRTLs/nvptx/test/parallel/barrier.c
new file mode 100644
index 0000000..7c70771
--- /dev/null
+++ b/libomptarget/deviceRTLs/nvptx/test/parallel/barrier.c
@@ -0,0 +1,37 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+  int data, out, flag = 0;
+#pragma omp target teams num_teams(2) map(tofrom                               \
+                                          : out) map(to                        \
+                                                     : data, flag)             \
+    thread_limit(1)
+#pragma omp parallel num_threads(1)
+  {
+    if (omp_get_team_num() == 0) {
+      /* Write to the data buffer that will be read by thread in team 1 */
+      data = 42;
+/* Flush data to thread in team 1 */
+#pragma omp barrier
+      /* Set flag to release thread in team 1 */
+#pragma omp atomic write
+      flag = 1;
+    } else if (omp_get_team_num() == 1) {
+      /* Loop until we see the update to the flag */
+      int val;
+      do {
+#pragma omp atomic read
+        val = flag;
+      } while (val < 1);
+      out = data;
+#pragma omp barrier
+    }
+  }
+  // CHECK: out=42.
+  /* Value of out will be 42 */
+  printf("out=%d.\n", out);
+  return !(out == 42);
+}