[CUDA][HIP] Mark device var used by host only

Add device variables to llvm.compiler.used if they are
ODR-used by either host or device functions.

This is necessary to prevent them from being
eliminated by whole-program optimization
where the compiler has no way to know a device
variable is used by some host code.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D98814

GitOrigin-RevId: d5c0f00e216aa6797499bb4c8aacac930d8a819b
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index b224de7..27fe048 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -1089,6 +1089,28 @@
 llvm::Function *CGNVCUDARuntime::finalizeModule() {
   if (CGM.getLangOpts().CUDAIsDevice) {
     transformManagedVars();
+
+    // Mark ODR-used device variables as compiler used to prevent it from being
+    // eliminated by optimization. This is necessary for device variables
+    // ODR-used by host functions. Sema correctly marks them as ODR-used no
+    // matter whether they are ODR-used by device or host functions.
+    //
+    // We do not need to do this if the variable has used attribute since it
+    // has already been added.
+    //
+    // Static device variables have been externalized at this point, therefore
+    // variables with LLVM private or internal linkage need not be added.
+    for (auto &&Info : DeviceVars) {
+      auto Kind = Info.Flags.getKind();
+      if (!Info.Var->isDeclaration() &&
+          !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
+          (Kind == DeviceVarFlags::Variable ||
+           Kind == DeviceVarFlags::Surface ||
+           Kind == DeviceVarFlags::Texture) &&
+          Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
+        CGM.addCompilerUsedGlobal(Info.Var);
+      }
+    }
     return nullptr;
   }
   return makeModuleCtorFunction();
diff --git a/test/CodeGenCUDA/host-used-device-var.cu b/test/CodeGenCUDA/host-used-device-var.cu
new file mode 100644
index 0000000..fd501ed
--- /dev/null
+++ b/test/CodeGenCUDA/host-used-device-var.cu
@@ -0,0 +1,47 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// Check device variables used by neither host nor device functioins are not kept.
+
+// CHECK-NOT: @v1
+__device__ int v1;
+
+// CHECK-NOT: @v2
+__constant__ int v2;
+
+// CHECK-NOT: @_ZL2v3
+static __device__ int v3;
+
+// Check device variables used by host functions are kept.
+
+// CHECK-DAG: @u1
+__device__ int u1;
+
+// CHECK-DAG: @u2
+__constant__ int u2;
+
+// Check host-used static device var is in llvm.compiler.used.
+// CHECK-DAG: @_ZL2u3
+static __device__ int u3;
+
+// Check device-used static device var is emitted but is not in llvm.compiler.used.
+// CHECK-DAG: @_ZL2u4
+static __device__ int u4;
+
+// Check device variables with used attribute are always kept.
+// CHECK-DAG: @u5
+__device__ __attribute__((used)) int u5;
+
+int fun1() {
+  return u1 + u2 + u3;
+}
+
+__global__ void kern1(int **x) {
+  *x = &u4;
+}
+// Check the exact list of variables to ensure @_ZL2u4 is not among them.
+// CHECK: @llvm.compiler.used = {{[^@]*}} @_ZL2u3 {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5