[OpenMP][DeviceRTL] Implement libc function `memcmp`

We will add some simple implementation of libc functions starting from
this patch, and the first one is `memcmp`, which is reported in #56929. Note that
`malloc` and `free` are not included in this patch because of the use of
`declare variant`. In the near future we will implement the two functions w/o
using any vendor provided function.

This fixes #56929.

Reviewed By: jhuber6

Differential Revision: https://reviews.llvm.org/D131182

GitOrigin-RevId: db5a2afa62428a461f17de30fc410c20d12fe201
diff --git a/libomptarget/DeviceRTL/CMakeLists.txt b/libomptarget/DeviceRTL/CMakeLists.txt
index ce66214..85cf718 100644
--- a/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/libomptarget/DeviceRTL/CMakeLists.txt
@@ -90,6 +90,7 @@
   ${include_directory}/Configuration.h
   ${include_directory}/Debug.h
   ${include_directory}/Interface.h
+  ${include_directory}/LibC.h
   ${include_directory}/Mapping.h
   ${include_directory}/State.h
   ${include_directory}/Synchronization.h
@@ -101,6 +102,7 @@
   ${source_directory}/Configuration.cpp
   ${source_directory}/Debug.cpp
   ${source_directory}/Kernel.cpp
+  ${source_directory}/LibC.cpp
   ${source_directory}/Mapping.cpp
   ${source_directory}/Misc.cpp
   ${source_directory}/Parallelism.cpp
diff --git a/libomptarget/DeviceRTL/include/Debug.h b/libomptarget/DeviceRTL/include/Debug.h
index 18c43f3..128572d 100644
--- a/libomptarget/DeviceRTL/include/Debug.h
+++ b/libomptarget/DeviceRTL/include/Debug.h
@@ -13,6 +13,7 @@
 #define OMPTARGET_DEVICERTL_DEBUG_H
 
 #include "Configuration.h"
+#include "LibC.h"
 
 /// Assertion
 ///
@@ -33,14 +34,6 @@
 
 ///}
 
-/// Print
-/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
-/// {
-
-extern "C" {
-int printf(const char *format, ...);
-}
-
 #define PRINTF(fmt, ...) (void)printf(fmt, ##__VA_ARGS__);
 #define PRINT(str) PRINTF("%s", str)
 
diff --git a/libomptarget/DeviceRTL/include/LibC.h b/libomptarget/DeviceRTL/include/LibC.h
new file mode 100644
index 0000000..87eed20
--- /dev/null
+++ b/libomptarget/DeviceRTL/include/LibC.h
@@ -0,0 +1,24 @@
+//===--------- LibC.h - Simple implementation of libc functions --- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPTARGET_LIBC_H
+#define OMPTARGET_LIBC_H
+
+#include "Types.h"
+
+extern "C" {
+
+int memcmp(const void *lhs, const void *rhs, size_t count);
+
+int printf(const char *format, ...);
+}
+
+#endif
diff --git a/libomptarget/DeviceRTL/include/Types.h b/libomptarget/DeviceRTL/include/Types.h
index 54a9ee1..8a0107d 100644
--- a/libomptarget/DeviceRTL/include/Types.h
+++ b/libomptarget/DeviceRTL/include/Types.h
@@ -32,6 +32,7 @@
 using uint32_t = unsigned int;
 using int64_t = long;
 using uint64_t = unsigned long;
+using size_t = decltype(sizeof(char));
 
 static_assert(sizeof(int8_t) == 1, "type size mismatch");
 static_assert(sizeof(uint8_t) == 1, "type size mismatch");
diff --git a/libomptarget/DeviceRTL/src/Debug.cpp b/libomptarget/DeviceRTL/src/Debug.cpp
index 45e08fa..c3ed85a 100644
--- a/libomptarget/DeviceRTL/src/Debug.cpp
+++ b/libomptarget/DeviceRTL/src/Debug.cpp
@@ -29,33 +29,6 @@
          assertion);
   __builtin_trap();
 }
-
-namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
-}
-
-#pragma omp begin declare variant match(                                       \
-    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-int32_t vprintf(const char *, void *);
-namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
-  return vprintf(Format, Arguments);
-}
-} // namespace impl
-#pragma omp end declare variant
-
-// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
-#pragma omp begin declare variant match(device = {arch(amdgcn)})
-namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
-  return -1;
-}
-} // namespace impl
-#pragma omp end declare variant
-
-int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
-  return impl::omp_vprintf(Format, Arguments, Size);
-}
 }
 
 /// Current indentation level for the function trace. Only accessed by thread 0.
diff --git a/libomptarget/DeviceRTL/src/LibC.cpp b/libomptarget/DeviceRTL/src/LibC.cpp
new file mode 100644
index 0000000..0f7b656
--- /dev/null
+++ b/libomptarget/DeviceRTL/src/LibC.cpp
@@ -0,0 +1,55 @@
+//===------- LibC.c - Simple implementation of libc functions ----- C -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LibC.h"
+
+#pragma omp begin declare target device_type(nohost)
+
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
+}
+
+#pragma omp begin declare variant match(                                       \
+    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+extern "C" int32_t vprintf(const char *, void *);
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+  return vprintf(Format, Arguments);
+}
+} // namespace impl
+#pragma omp end declare variant
+
+// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+  return -1;
+}
+} // namespace impl
+#pragma omp end declare variant
+
+extern "C" {
+
+int memcmp(const void *lhs, const void *rhs, size_t count) {
+  auto *L = reinterpret_cast<const unsigned char *>(lhs);
+  auto *R = reinterpret_cast<const unsigned char *>(rhs);
+
+  for (size_t I = 0; I < count; ++I)
+    if (L[I] != R[I])
+      return (int)L[I] - (int)R[I];
+
+  return 0;
+}
+
+/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
+int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
+  return impl::omp_vprintf(Format, Arguments, Size);
+}
+}
+
+#pragma omp end declare target
diff --git a/libomptarget/DeviceRTL/src/exports b/libomptarget/DeviceRTL/src/exports
index 58cbeb1..49e5ea2 100644
--- a/libomptarget/DeviceRTL/src/exports
+++ b/libomptarget/DeviceRTL/src/exports
@@ -1,3 +1,6 @@
 omp_*
 *llvm_*
 __kmpc_*
+
+memcmp
+printf
diff --git a/libomptarget/test/offloading/test_libc.cpp b/libomptarget/test/offloading/test_libc.cpp
new file mode 100644
index 0000000..cfccf47
--- /dev/null
+++ b/libomptarget/test/offloading/test_libc.cpp
@@ -0,0 +1,27 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+#include <algorithm>
+
+extern "C" int printf(const char *, ...);
+
+// std::equal is lowered to libc function memcmp.
+void test_memcpy() {
+#pragma omp target
+  {
+    int x[2] = {0, 0};
+    int y[2] = {0, 0};
+    int z[2] = {0, 1};
+    bool eq1 = std::equal(x, x + 2, y);
+    bool eq2 = std::equal(x, x + 2, z);
+    bool r = eq1 && !eq2;
+    printf("memcmp: %s\n", r ? "PASS" : "FAIL");
+  }
+}
+
+int main(int argc, char *argv[]) {
+  test_memcpy();
+
+  return 0;
+}
+
+// CHECK: memcmp: PASS