[HIP] Support hipRTC in header

hipRTC compiles HIP device code at run time. Since the system may not
have development tools installed, when a HIP program is compiled through
hipRTC, there is no standard C or C++ header available. As such, the HIP
headers should not depend on standard C or C++ headers when used
with hipRTC. Basically when hipRTC is used, HIP headers only provides
definitions of HIP device API functions. This is in line with what nvRTC does.

This patch adds support of hipRTC to HIP headers in clang. Basically hipRTC
defines a macro __HIPCC_RTC__ when compile HIP code at run time. When
this macro is defined, HIP headers do not include standard C/C++ headers.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D100652

GitOrigin-RevId: 6823af0ca858b54e09e5be61a19d067ccd0bd6b7
diff --git a/lib/Headers/__clang_hip_cmath.h b/lib/Headers/__clang_hip_cmath.h
index 18871e6..632d46e 100644
--- a/lib/Headers/__clang_hip_cmath.h
+++ b/lib/Headers/__clang_hip_cmath.h
@@ -14,6 +14,7 @@
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
+#if !defined(__HIPCC_RTC__)
 #if defined(__cplusplus)
 #include <limits>
 #include <type_traits>
@@ -21,6 +22,7 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
+#endif // __HIPCC_RTC__
 
 #pragma push_macro("__DEVICE__")
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
diff --git a/lib/Headers/__clang_hip_math.h b/lib/Headers/__clang_hip_math.h
index 14d91c6..35cf0ad 100644
--- a/lib/Headers/__clang_hip_math.h
+++ b/lib/Headers/__clang_hip_math.h
@@ -13,11 +13,13 @@
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
+#if !defined(__HIPCC_RTC__)
 #if defined(__cplusplus)
 #include <algorithm>
 #endif
 #include <limits.h>
 #include <stdint.h>
+#endif // __HIPCC_RTC__
 
 #pragma push_macro("__DEVICE__")
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
@@ -1260,6 +1262,7 @@
 __DEVICE__
 double min(double __x, double __y) { return fmin(__x, __y); }
 
+#if !defined(__HIPCC_RTC__)
 __host__ inline static int min(int __arg1, int __arg2) {
   return std::min(__arg1, __arg2);
 }
@@ -1267,6 +1270,7 @@
 __host__ inline static int max(int __arg1, int __arg2) {
   return std::max(__arg1, __arg2);
 }
+#endif // __HIPCC_RTC__
 #endif
 
 #pragma pop_macro("__DEVICE__")
diff --git a/lib/Headers/__clang_hip_runtime_wrapper.h b/lib/Headers/__clang_hip_runtime_wrapper.h
index 4fd8f23..8ee5566 100644
--- a/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -18,9 +18,27 @@
 
 #if __HIP__
 
+#if !defined(__HIPCC_RTC__)
 #include <cmath>
 #include <cstdlib>
 #include <stdlib.h>
+#else
+typedef __SIZE_TYPE__ size_t;
+// Define macros which are needed to declare HIP device API's without standard
+// C/C++ headers. This is for readability so that these API's can be written
+// the same way as non-hipRTC use case. These macros need to be popped so that
+// they do not pollute users' name space.
+#pragma push_macro("NULL")
+#pragma push_macro("uint32_t")
+#pragma push_macro("uint64_t")
+#pragma push_macro("CHAR_BIT")
+#pragma push_macro("INT_MAX")
+#define NULL (void *)0
+#define uint32_t __UINT32_TYPE__
+#define uint64_t __UINT64_TYPE__
+#define CHAR_BIT __CHAR_BIT__
+#define INT_MAX __INTMAX_MAX__
+#endif // __HIPCC_RTC__
 
 #define __host__ __attribute__((host))
 #define __device__ __attribute__((device))
@@ -54,6 +72,7 @@
 #include <__clang_hip_libdevice_declares.h>
 #include <__clang_hip_math.h>
 
+#if !defined(__HIPCC_RTC__)
 #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
 #include <__clang_cuda_math_forward_declares.h>
 #include <__clang_hip_cmath.h>
@@ -62,9 +81,16 @@
 #include <algorithm>
 #include <complex>
 #include <new>
+#endif // __HIPCC_RTC__
 #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
 
 #define __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ 1
-
+#if defined(__HIPCC_RTC__)
+#pragma pop_macro("NULL")
+#pragma pop_macro("uint32_t")
+#pragma pop_macro("uint64_t")
+#pragma pop_macro("CHAR_BIT")
+#pragma pop_macro("INT_MAX")
+#endif // __HIPCC_RTC__
 #endif // __HIP__
 #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__
diff --git a/test/Headers/hip-header.hip b/test/Headers/hip-header.hip
new file mode 100644
index 0000000..943254b
--- /dev/null
+++ b/test/Headers/hip-header.hip
@@ -0,0 +1,20 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -D__HIPCC_RTC__ | FileCheck %s
+
+// expected-no-diagnostics
+
+// CHECK-LABEL: amdgpu_kernel void @_Z4kernPff
+__global__ void kern(float *x, float y) {
+  *x = sin(y);
+}
+
+// CHECK-LABEL: define{{.*}} i64 @_Z11test_size_tv
+// CHEC: ret i64 8
+__device__ size_t test_size_t() {
+  return sizeof(size_t);
+}
diff --git a/test/lit.cfg.py b/test/lit.cfg.py
index 21b6745..63ba816 100644
--- a/test/lit.cfg.py
+++ b/test/lit.cfg.py
@@ -25,7 +25,7 @@
 config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
 
 # suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.c', '.cpp', '.i', '.cppm', '.m', '.mm', '.cu',
+config.suffixes = ['.c', '.cpp', '.i', '.cppm', '.m', '.mm', '.cu', '.hip',
                    '.ll', '.cl', '.clcpp', '.s', '.S', '.modulemap', '.test', '.rs', '.ifs']
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'