[libomptarget][devicertl][nfc] Simplify target_atomic abstraction

[libomptarget][devicertl][nfc] Simplify target_atomic abstraction

Atomic functions were implemented as a shim around cuda's atomics, with
amdgcn implementing those symbols as a shim around gcc style intrinsics.

This patch folds target_atomic.h into target_impl.h and folds amdgcn.

Further work is likely to be useful here, either changing to openmp's atomic
interface or instantiating the templates on the few used types in order to
move them into a cuda/c++ implementation file. This change is mostly to
group the remaining uses of the cuda api under nvptx' target_impl abstraction.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95062

GitOrigin-RevId: fbc1dcb946553a3dc923a63288d9275eea86f918
diff --git a/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 8bb395f..8d9abe5 100644
--- a/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -73,14 +73,12 @@
 
 set(h_files
   ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
-  ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_atomics.h
   ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
   ${devicertl_base_directory}/common/debug.h
   ${devicertl_base_directory}/common/device_environment.h
   ${devicertl_base_directory}/common/omptarget.h
   ${devicertl_base_directory}/common/omptargeti.h
   ${devicertl_base_directory}/common/state-queue.h
-  ${devicertl_base_directory}/common/target_atomic.h
   ${devicertl_base_directory}/common/state-queuei.h
   ${devicertl_base_directory}/common/support.h)
 
diff --git a/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h b/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
deleted file mode 100644
index 04e80b9..0000000
--- a/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===---- hip_atomics.h - Declarations of hip atomic functions ---- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_AMDGCN_HIP_ATOMICS_H
-#define OMPTARGET_AMDGCN_HIP_ATOMICS_H
-
-#include "target_impl.h"
-
-namespace {
-
-template <typename T> DEVICE T atomicAdd(T *address, T val) {
-  return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
-}
-
-template <typename T> DEVICE T atomicMax(T *address, T val) {
-  return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
-}
-
-template <typename T> DEVICE T atomicExch(T *address, T val) {
-  T r;
-  __atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
-  return r;
-}
-
-template <typename T> DEVICE T atomicCAS(T *address, T compare, T val) {
-  (void)__atomic_compare_exchange(address, &compare, &val, false,
-                                  __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
-  return compare;
-}
-
-INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
-  return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
-}
-
-} // namespace
-#endif
diff --git a/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b1e9a1a..6e8a651 100644
--- a/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -29,8 +29,6 @@
 #define SHARED __attribute__((shared))
 #define ALIGN(N) __attribute__((aligned(N)))
 
-#include "hip_atomics.h"
-
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel options
 ////////////////////////////////////////////////////////////////////////////////
@@ -127,6 +125,31 @@
 DEVICE unsigned GetWarpId();
 DEVICE unsigned GetLaneId();
 
+// Atomics
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+  return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
+}
+
+INLINE uint32_t __kmpc_atomic_inc(uint32_t *address, uint32_t max) {
+  return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+  return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+  T r;
+  __atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
+  return r;
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+  (void)__atomic_compare_exchange(address, &compare, &val, false,
+                                  __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+  return compare;
+}
+
 // Locks
 DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
 DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);
diff --git a/libomptarget/deviceRTLs/common/omptargeti.h b/libomptarget/deviceRTLs/common/omptargeti.h
index 14faa59..108724e 100644
--- a/libomptarget/deviceRTLs/common/omptargeti.h
+++ b/libomptarget/deviceRTLs/common/omptargeti.h
@@ -11,8 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "common/target_atomic.h"
-
 ////////////////////////////////////////////////////////////////////////////////
 // Task Descriptor
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/libomptarget/deviceRTLs/common/src/libcall.cu b/libomptarget/deviceRTLs/common/src/libcall.cu
index 69b27f1..f43d74a 100644
--- a/libomptarget/deviceRTLs/common/src/libcall.cu
+++ b/libomptarget/deviceRTLs/common/src/libcall.cu
@@ -13,7 +13,6 @@
 #pragma omp declare target
 
 #include "common/omptarget.h"
-#include "common/target_atomic.h"
 #include "target_impl.h"
 
 EXTERN double omp_get_wtick(void) {
diff --git a/libomptarget/deviceRTLs/common/src/loop.cu b/libomptarget/deviceRTLs/common/src/loop.cu
index dfe6c7f..a3ace09 100644
--- a/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/libomptarget/deviceRTLs/common/src/loop.cu
@@ -15,7 +15,6 @@
 
 #include "common/omptarget.h"
 #include "target_impl.h"
-#include "common/target_atomic.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/libomptarget/deviceRTLs/common/src/reduction.cu b/libomptarget/deviceRTLs/common/src/reduction.cu
index 3a3c445..0cfae1f 100644
--- a/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/libomptarget/deviceRTLs/common/src/reduction.cu
@@ -12,7 +12,6 @@
 #pragma omp declare target
 
 #include "common/omptarget.h"
-#include "common/target_atomic.h"
 #include "target_impl.h"
 
 EXTERN
diff --git a/libomptarget/deviceRTLs/common/state-queuei.h b/libomptarget/deviceRTLs/common/state-queuei.h
index 1bd261f..902eff9 100644
--- a/libomptarget/deviceRTLs/common/state-queuei.h
+++ b/libomptarget/deviceRTLs/common/state-queuei.h
@@ -17,7 +17,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "state-queue.h"
-#include "common/target_atomic.h"
 
 template <typename ElementType, uint32_t SIZE>
 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
diff --git a/libomptarget/deviceRTLs/common/target_atomic.h b/libomptarget/deviceRTLs/common/target_atomic.h
deleted file mode 100644
index 3c905d3..0000000
--- a/libomptarget/deviceRTLs/common/target_atomic.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Declarations of atomic functions provided by each target
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_TARGET_ATOMIC_H
-#define OMPTARGET_TARGET_ATOMIC_H
-
-#include "target_impl.h"
-
-template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
-  return atomicAdd(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
-  return atomicInc(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
-  return atomicMax(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
-  return atomicExch(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
-  return atomicCAS(address, compare, val);
-}
-
-#endif
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index b5ef549..ffc7498 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -13,7 +13,6 @@
 
 #include "target_impl.h"
 #include "common/debug.h"
-#include "common/target_atomic.h"
 
 #include <cuda.h>
 
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index ab9fd16..ba3d331 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -130,6 +130,27 @@
 DEVICE unsigned GetWarpId();
 DEVICE unsigned GetLaneId();
 
+// Atomics
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+  return atomicAdd(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
+  return atomicInc(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+  return atomicMax(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+  return atomicExch(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+  return atomicCAS(address, compare, val);
+}
+
 // Locks
 DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
 DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);