[OpenMP] Remove 'keep_alive' functionality from the device RTL

The OpenMP DeviceRTL uses a hacky workaround to keep certain runtime
calls alive. This used a function that prevented them from being
optimized out. We needed this hack because the 'OpenMPOpt' pass likes to
introduce new runtime calls into the TU. This then interacted badly with
the method of linking the bitcode file per-TU like we do with Nvidia.
The OpenMPOpt pass would then generate a runtime call to a function that
was never linked in.

This should not be a problem anymore because we unconditionally link in
the `libomptarget.devicertl.a` runtime library. This should thus only
extract symbols that are undefined. So, if we do end up with an
unresolved reference it will be resolved by the static library.

The downside to this is that if we are doing non-LTO NVPTX compilation
that introduces one of these calls it will be linked outside the module
and therefore provide the overhead of an external function call.
However, removing this flag should make optimizing things easier. We
will need to see if that performance is a problem.

Reviewed By: ye-luo

Differential Revision: https://reviews.llvm.org/D151324

GitOrigin-RevId: 349c0aacb38072a868421ce7c460514be57a3de7
diff --git a/libomptarget/DeviceRTL/CMakeLists.txt b/libomptarget/DeviceRTL/CMakeLists.txt
index 8419b87..7540a8c 100644
--- a/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/libomptarget/DeviceRTL/CMakeLists.txt
@@ -31,9 +31,8 @@
   find_program(PACKAGER_TOOL clang-offload-packager PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
   find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
   find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
-  find_program(EXTRACT_TOOL llvm-extract PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
-  if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL) OR (NOT EXTRACT_TOOL) OR (NOT PACKAGER_TOOL))
-    libomptarget_say("Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL}, opt: ${OPT_TOOL}, llvm-extract: ${EXTRACT_TOOL}, or clang-offload-packager: ${PACKAGER_TOOL}")
+  if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL) OR (NOT PACKAGER_TOOL))
+    libomptarget_say("Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL}, opt: ${OPT_TOOL}, or clang-offload-packager: ${PACKAGER_TOOL}")
     return()
   else()
     libomptarget_say("Building DeviceRTL. Using clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} and opt: ${OPT_TOOL}")
@@ -45,7 +44,6 @@
   set(PACKAGER_TOOL $<TARGET_FILE:clang-offload-packager>)
   set(LINK_TOOL $<TARGET_FILE:llvm-link>)
   set(OPT_TOOL $<TARGET_FILE:opt>)
-  set(EXTRACT_TOOL $<TARGET_FILE:llvm-extract>)
   libomptarget_say("Building DeviceRTL. Using clang from in-tree build")
 else()
   libomptarget_say("Not building DeviceRTL. No appropriate clang found")
@@ -114,7 +112,6 @@
 set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512)
 set(link_opt_flags  -O3        -openmp-opt-disable -attributor-enable=module)
 set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
-set(link_extract_flag --func='__keep_alive' --delete)
 
 # Prepend -I to each list element
 set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}")
@@ -220,18 +217,6 @@
   # Install bitcode library under the lib destination folder.
   install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}")
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
-      COMMAND ${EXTRACT_TOOL} ${link_extract_flag} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
-                      -o ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
-      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bclib_target_name}
-      COMMENT "Extracting LLVM bitcode ${bclib_name}"
-  )
-  if("${EXTRACT_TOOL}" STREQUAL "$<TARGET_FILE:llvm-extract>")
-    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
-      DEPENDS llvm-extract
-      APPEND)
-  endif()
-
   set(target_feature "")
   if("${target_triple}" STREQUAL "nvptx64-nvidia-cuda")
     set(target_feature "feature=+ptx61")
@@ -240,8 +225,8 @@
   # Package the bitcode in the bitcode and embed it in an ELF for the static library
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
       COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
-        "--image=file=${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name},${target_feature},triple=${target_triple},arch=${target_cpu},kind=openmp"
-      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
+        "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},${target_feature},triple=${target_triple},arch=${target_cpu},kind=openmp"
+      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
       COMMENT "Packaging LLVM offloading binary ${bclib_name}.out"
   )
   if("${PACKAGER_TOOL}" STREQUAL "$<TARGET_FILE:clang-offload-packager>")
diff --git a/libomptarget/DeviceRTL/src/Utils.cpp b/libomptarget/DeviceRTL/src/Utils.cpp
index 41a919d..d74f7e0 100644
--- a/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/libomptarget/DeviceRTL/src/Utils.cpp
@@ -21,16 +21,6 @@
 
 extern "C" __attribute__((weak)) int IsSPMDMode;
 
-/// Helper to keep code alive without introducing a performance penalty.
-extern "C" __attribute__((weak, optnone, cold, used, retain)) void
-__keep_alive() {
-  __kmpc_get_hardware_thread_id_in_block();
-  __kmpc_get_hardware_num_threads_in_block();
-  __kmpc_get_warp_size();
-  __kmpc_barrier_simple_spmd(nullptr, IsSPMDMode);
-  __kmpc_barrier_simple_generic(nullptr, IsSPMDMode);
-}
-
 namespace impl {
 
 bool isSharedMemPtr(const void *Ptr) { return false; }
diff --git a/libomptarget/DeviceRTL/src/exports b/libomptarget/DeviceRTL/src/exports
index 0a23157..85fd459 100644
--- a/libomptarget/DeviceRTL/src/exports
+++ b/libomptarget/DeviceRTL/src/exports
@@ -4,7 +4,6 @@
 
 _ZN4ompx*
 
-__keep_alive
 IsSPMDMode
 
 memcmp