| // REQUIRES: amdgpu-registered-target |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ |
| // RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ |
| // RUN: -target-cpu gfx906 | FileCheck %s |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ |
| // RUN: -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \ |
| // RUN: -target-cpu gfx906 | FileCheck -check-prefix=NEGCHK %s |
| |
| #include "Inputs/cuda.h" |
| |
| // AMDGPU internalize unused global variables for whole-program compilation |
| // (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then |
| // eliminated by global DCE. If there are invisible unused address space casts |
| // for global variables, these dead users need to be eliminated by global |
| // DCE before internalization. This test makes sure unused global variables |
| // are eliminated. |
| |
| // CHECK-DAG: @v1 |
| __device__ int v1; |
| |
| // CHECK-DAG: @v2 |
| __constant__ int v2; |
| |
| // Check unused device/constant variables are eliminated. |
| |
| // NEGCHK-NOT: @_ZL2v3 |
| constexpr int v3 = 1; |
| |
| // Check managed variables are always kept. |
| |
| // CHECK-DAG: @v4 |
| __managed__ int v4; |
| |
| // Check used device/constant variables are not eliminated. |
| // CHECK-DAG: @u1 |
| __device__ int u1; |
| |
| // CHECK-DAG: @u2 |
| __constant__ int u2; |
| |
| // Check u3 is kept because its address is taken. |
| // CHECK-DAG: @_ZL2u3 |
| constexpr int u3 = 2; |
| |
| // Check u4 is not kept because it is not ODR-use. |
| // NEGCHK-NOT: @_ZL2u4 |
| constexpr int u4 = 3; |
| |
| __device__ int fun1(const int& x); |
| |
| __global__ void kern1(int *x) { |
| *x = u1 + u2 + fun1(u3) + u4; |
| } |