[OpenMP] Lower printf to __llvm_omp_vprintf

Extension of D112504. Lower amdgpu printf to `__llvm_omp_vprintf`
which takes the same const char*, void* arguments as cuda vprintf and also
passes the size of the void* alloca which will be needed by a non-stub
implementation of `__llvm_omp_vprintf` for amdgpu.

This removes the amdgpu link error on any printf in a target region in favour
of silently compiling code that doesn't print anything to stdout.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D112680

GitOrigin-RevId: 27177b82d4ca4451f288168fc1e06c0736afbdaf
diff --git a/libomptarget/DeviceRTL/include/Debug.h b/libomptarget/DeviceRTL/include/Debug.h
index ee1b485..f66d566 100644
--- a/libomptarget/DeviceRTL/include/Debug.h
+++ b/libomptarget/DeviceRTL/include/Debug.h
@@ -34,23 +34,15 @@
 ///}
 
 /// Print
-/// TODO: For now we have to use macros to guard the code because Clang lowers
-/// `printf` to different function calls on NVPTX and AMDGCN platforms, and it
-/// doesn't work for AMDGCN. After it can work on AMDGCN, we will remove the
-/// macro.
+/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
 /// {
 
-#ifndef __AMDGCN__
 extern "C" {
 int printf(const char *format, ...);
 }
 
-#define PRINTF(fmt, ...) (void)printf(fmt, __VA_ARGS__);
+#define PRINTF(fmt, ...) (void)printf(fmt, ##__VA_ARGS__);
 #define PRINT(str) PRINTF("%s", str)
-#else
-#define PRINTF(fmt, ...)
-#define PRINT(str)
-#endif
 
 ///}
 
diff --git a/libomptarget/DeviceRTL/src/Debug.cpp b/libomptarget/DeviceRTL/src/Debug.cpp
index 4fbd2be..fc9b2ed 100644
--- a/libomptarget/DeviceRTL/src/Debug.cpp
+++ b/libomptarget/DeviceRTL/src/Debug.cpp
@@ -29,6 +29,29 @@
          assertion);
   __builtin_trap();
 }
+
+#pragma omp begin declare variant match(                                       \
+    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+int32_t vprintf(const char *, void *);
+namespace impl {
+static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+  return vprintf(Format, Arguments);
+}
+} // namespace impl
+#pragma omp end declare variant
+
+// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+namespace impl {
+static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+  return -1;
+}
+} // namespace impl
+#pragma omp end declare variant
+
+int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
+  return impl::omp_vprintf(Format, Arguments, Size);
+}
 }
 
 /// Current indentation level for the function trace. Only accessed by thread 0.
diff --git a/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 3747c87..d5161da 100644
--- a/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -187,6 +187,11 @@
 }
 __attribute__((weak)) EXTERN void __kmpc_impl_free(void *) {}
 
+EXTERN
+int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+  return -1;
+}
+
 EXTERN void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
   lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
   hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index ee58369..c442596 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -184,9 +184,15 @@
 extern "C" {
 void *malloc(size_t);
 void free(void *);
+int32_t vprintf(const char *, void *);
 }
 
 EXTERN void *__kmpc_impl_malloc(size_t x) { return malloc(x); }
 EXTERN void __kmpc_impl_free(void *x) { free(x); }
 
+EXTERN int32_t __llvm_omp_vprintf(const char *Format, void *Arguments,
+                                  uint32_t) {
+  return vprintf(Format, Arguments);
+}
+
 #pragma omp end declare target
diff --git a/libomptarget/test/mapping/data_member_ref.cpp b/libomptarget/test/mapping/data_member_ref.cpp
index dff5987..5ac1a0b 100644
--- a/libomptarget/test/mapping/data_member_ref.cpp
+++ b/libomptarget/test/mapping/data_member_ref.cpp
@@ -1,6 +1,6 @@
 // RUN: %libomptarget-compilexx-run-and-check-generic
 
-// amdgcn does not have printf definition
+// Wrong results on amdgpu
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp b/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
index 7825d98..00d8572 100644
--- a/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
+++ b/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
@@ -1,6 +1,6 @@
 // RUN: %libomptarget-compilexx-run-and-check-generic
 
-// amdgcn does not have printf definition
+// Wrong results on amdgpu
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp b/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
index bf2addd..eadf460 100644
--- a/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
+++ b/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
@@ -1,6 +1,6 @@
 // RUN: %libomptarget-compilexx-run-and-check-generic
 
-// amdgcn does not have printf definition
+// Wrong results on amdgpu
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/mapping/lambda_by_value.cpp b/libomptarget/test/mapping/lambda_by_value.cpp
index 9cd3833..711decb 100644
--- a/libomptarget/test/mapping/lambda_by_value.cpp
+++ b/libomptarget/test/mapping/lambda_by_value.cpp
@@ -1,6 +1,6 @@
 // RUN: %libomptarget-compilexx-run-and-check-generic
 
-// amdgcn does not have printf definition
+// Wrong results on amdgpu
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/mapping/ompx_hold/struct.c b/libomptarget/test/mapping/ompx_hold/struct.c
index fc63e86..450b601 100644
--- a/libomptarget/test/mapping/ompx_hold/struct.c
+++ b/libomptarget/test/mapping/ompx_hold/struct.c
@@ -1,7 +1,7 @@
 // RUN: %libomptarget-compile-generic -fopenmp-extensions
 // RUN: %libomptarget-run-generic | %fcheck-generic -strict-whitespace
 
-// amdgcn does not have printf definition
+// Wrong results on amdgpu
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/mapping/ptr_and_obj_motion.c b/libomptarget/test/mapping/ptr_and_obj_motion.c
index 4852561..a975ed6 100644
--- a/libomptarget/test/mapping/ptr_and_obj_motion.c
+++ b/libomptarget/test/mapping/ptr_and_obj_motion.c
@@ -1,9 +1,5 @@
 // RUN: %libomptarget-compile-run-and-check-generic
 
-// amdgcn does not have printf definition
-// XFAIL: amdgcn-amd-amdhsa
-// XFAIL: amdgcn-amd-amdhsa-newRTL
-
 #include <stdio.h>
 
 typedef struct {
diff --git a/libomptarget/test/mapping/reduction_implicit_map.cpp b/libomptarget/test/mapping/reduction_implicit_map.cpp
index 24b97bd..429bb68 100644
--- a/libomptarget/test/mapping/reduction_implicit_map.cpp
+++ b/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -1,9 +1,5 @@
 // RUN: %libomptarget-compilexx-run-and-check-generic
 
-// amdgcn does not have printf definition
-// UNSUPPORTED: amdgcn-amd-amdhsa
-// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
-
 #include <stdio.h>
 
 void sum(int* input, int size, int* output)
diff --git a/libomptarget/test/offloading/bug49021.cpp b/libomptarget/test/offloading/bug49021.cpp
index 1e456af..42e5573 100644
--- a/libomptarget/test/offloading/bug49021.cpp
+++ b/libomptarget/test/offloading/bug49021.cpp
@@ -1,8 +1,7 @@
 // RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
 
-// Wrong results on amdgcn
-// UNSUPPORTED: amdgcn-amd-amdhsa
-// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
+// Wrong results on amdgpu
+// XFAIL: amdgcn-amd-amdhsa
 
 #include <iostream>
 
diff --git a/libomptarget/test/offloading/bug50022.cpp b/libomptarget/test/offloading/bug50022.cpp
index ca1f0e1..54ce06e 100644
--- a/libomptarget/test/offloading/bug50022.cpp
+++ b/libomptarget/test/offloading/bug50022.cpp
@@ -1,8 +1,5 @@
 // RUN: %libomptarget-compilexx-and-run-generic
 
-// UNSUPPORTED: amdgcn-amd-amdhsa
-// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
-
 #include <cassert>
 #include <iostream>
 #include <stdexcept>
diff --git a/libomptarget/test/offloading/host_as_target.c b/libomptarget/test/offloading/host_as_target.c
index 1e7cdef..1fa7116 100644
--- a/libomptarget/test/offloading/host_as_target.c
+++ b/libomptarget/test/offloading/host_as_target.c
@@ -7,7 +7,7 @@
 
 // RUN: %libomptarget-compile-run-and-check-generic
 
-// amdgcn does not have printf definition
+// amdgpu does not have a working printf definition
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/unified_shared_memory/api.c b/libomptarget/test/unified_shared_memory/api.c
index fcb5318..f2882f2 100644
--- a/libomptarget/test/unified_shared_memory/api.c
+++ b/libomptarget/test/unified_shared_memory/api.c
@@ -2,7 +2,7 @@
 // XFAIL: nvptx64-nvidia-cuda
 // XFAIL: nvptx64-nvidia-cuda-newRTL
 
-// Fails on amdgcn with error: GPU Memory Error
+// Fails on amdgpu with error: GPU Memory Error
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/unified_shared_memory/close_enter_exit.c b/libomptarget/test/unified_shared_memory/close_enter_exit.c
index 62555d2..7f1abe3 100644
--- a/libomptarget/test/unified_shared_memory/close_enter_exit.c
+++ b/libomptarget/test/unified_shared_memory/close_enter_exit.c
@@ -3,7 +3,7 @@
 // REQUIRES: unified_shared_memory
 // UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
 
-// Fails on amdgcn with error: GPU Memory Error
+// Fails on amdgpu with error: GPU Memory Error
 // XFAIL: amdgcn-amd-amdhsa
 // XFAIL: amdgcn-amd-amdhsa-newRTL
 
diff --git a/libomptarget/test/unified_shared_memory/close_modifier.c b/libomptarget/test/unified_shared_memory/close_modifier.c
index 98f1322..ce368a3 100644
--- a/libomptarget/test/unified_shared_memory/close_modifier.c
+++ b/libomptarget/test/unified_shared_memory/close_modifier.c
@@ -3,9 +3,9 @@
 // REQUIRES: unified_shared_memory
 // UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
 
-// amdgcn does not have printf definition
-// XFAIL: amdgcn-amd-amdhsa
-// XFAIL: amdgcn-amd-amdhsa-newRTL
+// amdgpu runtime crash
+// UNSUPPORTED: amdgcn-amd-amdhsa
+
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/libomptarget/test/unified_shared_memory/shared_update.c b/libomptarget/test/unified_shared_memory/shared_update.c
index 2b90cf3..b211d33 100644
--- a/libomptarget/test/unified_shared_memory/shared_update.c
+++ b/libomptarget/test/unified_shared_memory/shared_update.c
@@ -2,9 +2,8 @@
 
 // REQUIRES: unified_shared_memory
 
-// amdgcn does not have printf definition
-// XFAIL: amdgcn-amd-amdhsa
-// XFAIL: amdgcn-amd-amdhsa-newRTL
+// amdgpu runtime crash
+// UNSUPPORTED: amdgcn-amd-amdhsa
 
 #include <stdio.h>
 #include <omp.h>