[libomptarget] Build DeviceRTL for amdgpu

Passes same tests as the current deviceRTL. Includes cmake change from D111987.
CI is showing a different set of pass/fails to local, committing this
without the tests enabled by default while debugging that difference.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D112227

GitOrigin-RevId: 4d50803ce49ce6b57c4865361c9ba0ad7063b7be
diff --git a/libomptarget/DeviceRTL/CMakeLists.txt b/libomptarget/DeviceRTL/CMakeLists.txt
index a4f9862..419c64d 100644
--- a/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/libomptarget/DeviceRTL/CMakeLists.txt
@@ -226,6 +226,5 @@
 endforeach()
 
 foreach(mcpu ${amdgpu_mcpus})
-  # require D112227 or similar to enable the compilation for amdgpu
-  # compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
+  compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
 endforeach()
diff --git a/libomptarget/DeviceRTL/src/Configuration.cpp b/libomptarget/DeviceRTL/src/Configuration.cpp
index 2b6f20f..f7c61dc 100644
--- a/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -20,9 +20,9 @@
 
 #pragma omp declare target
 
-extern uint32_t __omp_rtl_debug_kind;
+extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
 
-// TOOD: We want to change the name as soon as the old runtime is gone.
+// TODO: We want to change the name as soon as the old runtime is gone.
 DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
     __attribute__((used));
 
diff --git a/libomptarget/DeviceRTL/src/Synchronization.cpp b/libomptarget/DeviceRTL/src/Synchronization.cpp
index d09461a..931dffc 100644
--- a/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -68,8 +68,23 @@
 ///{
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
-uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
-  return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, "");
+uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
+  // builtin_amdgcn_atomic_inc32 should expand to this switch when
+  // passed a runtime value, but does not do so yet. Workaround here.
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_RELAXED:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
+  }
 }
 
 uint32_t SHARED(namedBarrierTracker);
@@ -126,6 +141,52 @@
   fence::team(__ATOMIC_RELEASE);
 }
 
+// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
+// so that it is usable within a template environment and so that a runtime
+// value of the memory order is expanded to this switch within clang/llvm.
+void fenceTeam(int Ordering) {
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+  }
+}
+void fenceKernel(int Ordering) {
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+  }
+}
+void fenceSystem(int Ordering) {
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+  }
+}
+
 void syncWarp(__kmpc_impl_lanemask_t) {
   // AMDGCN doesn't need to sync threads in a warp
 }
@@ -133,11 +194,12 @@
 void syncThreads() { __builtin_amdgcn_s_barrier(); }
 void syncThreadsAligned() { syncThreads(); }
 
-void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); }
-
-void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); }
-
-void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); }
+// TODO: Don't have wavefront lane locks. Possibly can't have them.
+void unsetLock(omp_lock_t *) { __builtin_trap(); }
+int testLock(omp_lock_t *) { __builtin_trap(); }
+void initLock(omp_lock_t *) { __builtin_trap(); }
+void destroyLock(omp_lock_t *) { __builtin_trap(); }
+void setLock(omp_lock_t *) { __builtin_trap(); }
 
 #pragma omp end declare variant
 ///}
diff --git a/libomptarget/test/mapping/data_member_ref.cpp b/libomptarget/test/mapping/data_member_ref.cpp
index ec23890..dff5987 100644
--- a/libomptarget/test/mapping/data_member_ref.cpp
+++ b/libomptarget/test/mapping/data_member_ref.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 
diff --git a/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp b/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
index 7edd7db..7825d98 100644
--- a/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
+++ b/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <cstdio>
 #include <cstdlib>
diff --git a/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp b/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
index c8986dd..bf2addd 100644
--- a/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
+++ b/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <cstdio>
 #include <cstdlib>
diff --git a/libomptarget/test/mapping/delete_inf_refcount.c b/libomptarget/test/mapping/delete_inf_refcount.c
index cd67ddd..c6d2bda 100644
--- a/libomptarget/test/mapping/delete_inf_refcount.c
+++ b/libomptarget/test/mapping/delete_inf_refcount.c
@@ -2,6 +2,7 @@
 
 // fails with error message 'Unable to generate target entries' on amdgcn
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
diff --git a/libomptarget/test/mapping/lambda_by_value.cpp b/libomptarget/test/mapping/lambda_by_value.cpp
index 6e35324..9cd3833 100644
--- a/libomptarget/test/mapping/lambda_by_value.cpp
+++ b/libomptarget/test/mapping/lambda_by_value.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <stdint.h>
diff --git a/libomptarget/test/mapping/ompx_hold/struct.c b/libomptarget/test/mapping/ompx_hold/struct.c
index 2a0626b..fc63e86 100644
--- a/libomptarget/test/mapping/ompx_hold/struct.c
+++ b/libomptarget/test/mapping/ompx_hold/struct.c
@@ -3,6 +3,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/libomptarget/test/mapping/ptr_and_obj_motion.c b/libomptarget/test/mapping/ptr_and_obj_motion.c
index ddea2fb..4852561 100644
--- a/libomptarget/test/mapping/ptr_and_obj_motion.c
+++ b/libomptarget/test/mapping/ptr_and_obj_motion.c
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 
diff --git a/libomptarget/test/mapping/reduction_implicit_map.cpp b/libomptarget/test/mapping/reduction_implicit_map.cpp
index 040accd..24b97bd 100644
--- a/libomptarget/test/mapping/reduction_implicit_map.cpp
+++ b/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 
diff --git a/libomptarget/test/offloading/bug49021.cpp b/libomptarget/test/offloading/bug49021.cpp
index 521adf2..1e456af 100644
--- a/libomptarget/test/offloading/bug49021.cpp
+++ b/libomptarget/test/offloading/bug49021.cpp
@@ -2,6 +2,7 @@
 
 // Wrong results on amdgcn
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <iostream>
 
diff --git a/libomptarget/test/offloading/bug49334.cpp b/libomptarget/test/offloading/bug49334.cpp
index 0ba0815..4907d32 100644
--- a/libomptarget/test/offloading/bug49334.cpp
+++ b/libomptarget/test/offloading/bug49334.cpp
@@ -2,7 +2,7 @@
 
 // Currently hangs on amdgpu
 // UNSUPPORTED: amdgcn-amd-amdhsa
-
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 // UNSUPPORTED: x86_64-pc-linux-gnu
 
 #include <cassert>
diff --git a/libomptarget/test/offloading/bug50022.cpp b/libomptarget/test/offloading/bug50022.cpp
index a520442..ca1f0e1 100644
--- a/libomptarget/test/offloading/bug50022.cpp
+++ b/libomptarget/test/offloading/bug50022.cpp
@@ -1,6 +1,7 @@
 // RUN: %libomptarget-compilexx-and-run-generic
 
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <cassert>
 #include <iostream>
diff --git a/libomptarget/test/offloading/global_constructor.cpp b/libomptarget/test/offloading/global_constructor.cpp
index d73fe1a..ae602df 100644
--- a/libomptarget/test/offloading/global_constructor.cpp
+++ b/libomptarget/test/offloading/global_constructor.cpp
@@ -2,6 +2,7 @@
 
 // Fails in DAGToDAG on an address space problem
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <cmath>
 #include <cstdio>
diff --git a/libomptarget/test/offloading/host_as_target.c b/libomptarget/test/offloading/host_as_target.c
index c25a480..1e7cdef 100644
--- a/libomptarget/test/offloading/host_as_target.c
+++ b/libomptarget/test/offloading/host_as_target.c
@@ -9,6 +9,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
diff --git a/libomptarget/test/unified_shared_memory/api.c b/libomptarget/test/unified_shared_memory/api.c
index 7282491..fcb5318 100644
--- a/libomptarget/test/unified_shared_memory/api.c
+++ b/libomptarget/test/unified_shared_memory/api.c
@@ -4,6 +4,7 @@
 
 // Fails on amdgcn with error: GPU Memory Error
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
diff --git a/libomptarget/test/unified_shared_memory/close_enter_exit.c b/libomptarget/test/unified_shared_memory/close_enter_exit.c
index e159ed8..62555d2 100644
--- a/libomptarget/test/unified_shared_memory/close_enter_exit.c
+++ b/libomptarget/test/unified_shared_memory/close_enter_exit.c
@@ -5,6 +5,7 @@
 
 // Fails on amdgcn with error: GPU Memory Error
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/libomptarget/test/unified_shared_memory/close_modifier.c b/libomptarget/test/unified_shared_memory/close_modifier.c
index 6667fd8..98f1322 100644
--- a/libomptarget/test/unified_shared_memory/close_modifier.c
+++ b/libomptarget/test/unified_shared_memory/close_modifier.c
@@ -5,6 +5,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/libomptarget/test/unified_shared_memory/shared_update.c b/libomptarget/test/unified_shared_memory/shared_update.c
index ab9b3e8..2b90cf3 100644
--- a/libomptarget/test/unified_shared_memory/shared_update.c
+++ b/libomptarget/test/unified_shared_memory/shared_update.c
@@ -4,6 +4,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>