[libomptarget][amdgcn] Tolerate deadstripped device_state variable

[libomptarget][amdgcn] Tolerate deadstripped device_state variable

The device_state variable may have been deadstripped. Similar to
device_environment, leave detection of missing but used symbol to loader.

Reviewed By: pdhaliwal

Differential Revision: https://reviews.llvm.org/D96330

GitOrigin-RevId: 56c446a878b1fa98774dcee1cb09135b8f286e59
diff --git a/libomptarget/plugins/amdgpu/src/rtl.cpp b/libomptarget/plugins/amdgpu/src/rtl.cpp
index 0d41bde..ec79239 100644
--- a/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1200,6 +1200,7 @@
   {
     // the device_State array is either large value in bss or a void* that
     // needs to be assigned to a pointer to an array of size device_state_bytes
+    // If absent, it has been deadstripped and needs no setup.
 
     void *state_ptr;
     uint32_t state_ptr_size;
@@ -1208,52 +1209,54 @@
         &state_ptr, &state_ptr_size);
 
     if (err != ATMI_STATUS_SUCCESS) {
-      fprintf(stderr, "failed to find device_state symbol\n");
-      return NULL;
-    }
-
-    if (state_ptr_size < sizeof(void *)) {
-      fprintf(stderr, "unexpected size of state_ptr %u != %zu\n",
-              state_ptr_size, sizeof(void *));
-      return NULL;
-    }
-
-    // if it's larger than a void*, assume it's a bss array and no further
-    // initialization is required. Only try to set up a pointer for
-    // sizeof(void*)
-    if (state_ptr_size == sizeof(void *)) {
-      uint64_t device_State_bytes =
-          get_device_State_bytes((char *)image->ImageStart, img_size);
-      if (device_State_bytes == 0) {
+      DP("No device_state symbol found, skipping initialization\n");
+    } else {
+      if (state_ptr_size < sizeof(void *)) {
+        DP("unexpected size of state_ptr %u != %zu\n", state_ptr_size,
+           sizeof(void *));
         return NULL;
       }
 
-      auto &dss = DeviceInfo.deviceStateStore[device_id];
-      if (dss.first.get() == nullptr) {
-        assert(dss.second == 0);
-        void *ptr = NULL;
-        atmi_status_t err =
-            atmi_calloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
-        if (err != ATMI_STATUS_SUCCESS) {
-          fprintf(stderr, "Failed to allocate device_state array\n");
+      // if it's larger than a void*, assume it's a bss array and no further
+      // initialization is required. Only try to set up a pointer for
+      // sizeof(void*)
+      if (state_ptr_size == sizeof(void *)) {
+        uint64_t device_State_bytes =
+            get_device_State_bytes((char *)image->ImageStart, img_size);
+        if (device_State_bytes == 0) {
+          DP("Can't initialize device_State, missing size information\n");
           return NULL;
         }
-        dss = {std::unique_ptr<void, RTLDeviceInfoTy::atmiFreePtrDeletor>{ptr},
-               device_State_bytes};
-      }
 
-      void *ptr = dss.first.get();
-      if (device_State_bytes != dss.second) {
-        fprintf(stderr, "Inconsistent sizes of device_State unsupported\n");
-        exit(1);
-      }
+        auto &dss = DeviceInfo.deviceStateStore[device_id];
+        if (dss.first.get() == nullptr) {
+          assert(dss.second == 0);
+          void *ptr = NULL;
+          atmi_status_t err = atmi_calloc(&ptr, device_State_bytes,
+                                          get_gpu_mem_place(device_id));
+          if (err != ATMI_STATUS_SUCCESS) {
+            DP("Failed to allocate device_state array\n");
+            return NULL;
+          }
+          dss = {
+              std::unique_ptr<void, RTLDeviceInfoTy::atmiFreePtrDeletor>{ptr},
+              device_State_bytes,
+          };
+        }
 
-      // write ptr to device memory so it can be used by later kernels
-      err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr,
-                                                 sizeof(void *), device_id);
-      if (err != ATMI_STATUS_SUCCESS) {
-        fprintf(stderr, "memcpy install of state_ptr failed\n");
-        return NULL;
+        void *ptr = dss.first.get();
+        if (device_State_bytes != dss.second) {
+          DP("Inconsistent sizes of device_State unsupported\n");
+          return NULL;
+        }
+
+        // write ptr to device memory so it can be used by later kernels
+        err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr,
+                                                   sizeof(void *), device_id);
+        if (err != ATMI_STATUS_SUCCESS) {
+          DP("memcpy install of state_ptr failed\n");
+          return NULL;
+        }
       }
     }
   }
@@ -1289,9 +1292,8 @@
           get_gpu_mem_place(device_id), e->name, &varptr, &varsize);
 
       if (err != ATMI_STATUS_SUCCESS) {
-        DP("Loading global '%s' (Failed)\n", e->name);
         // Inform the user what symbol prevented offloading
-        fprintf(stderr, "Loading global '%s' (Failed)\n", e->name);
+        DP("Loading global '%s' (Failed)\n", e->name);
         return NULL;
       }