| # Ensure the compiler is a valid clang when building the GPU target. |
| set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") |
| if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND |
| ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}")) |
| message(FATAL_ERROR "Cannot build GPU device runtime. CMake compiler " |
| "'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' " |
| " is not 'Clang ${req_ver}'.") |
| endif() |
| |
| set(src_files |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp |
| ${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp |
| ) |
| |
| list(APPEND compile_options -flto) |
| list(APPEND compile_options -fvisibility=hidden) |
| list(APPEND compile_options -nogpulib) |
| list(APPEND compile_options -nostdlibinc) |
| list(APPEND compile_options -fno-rtti) |
| list(APPEND compile_options -fno-exceptions) |
| list(APPEND compile_options -fconvergent-functions) |
| list(APPEND compile_options -Wno-unknown-cuda-version) |
| if(LLVM_DEFAULT_TARGET_TRIPLE) |
| list(APPEND compile_options --target=${LLVM_DEFAULT_TARGET_TRIPLE}) |
| endif() |
| |
| # We disable the slp vectorizer during the runtime optimization to avoid |
| # vectorized accesses to the shared state. Generally, those are "good" but |
| # the optimizer pipeline (esp. Attributor) does not fully support vectorized |
| # instructions yet and we end up missing out on way more important constant |
| # propagation. That said, we will run the vectorizer again after the runtime |
| # has been linked into the user program. |
| list(APPEND compile_options "SHELL: -mllvm -vectorize-slp=false") |
| if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR |
| "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn") |
| set(target_name "amdgpu") |
| list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none") |
| elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR |
| "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx") |
| set(target_name "nvptx") |
| list(APPEND compile_options --cuda-feature=+ptx63) |
| endif() |
| |
| # Trick to combine these into a bitcode file via the linker's LTO pass. |
| add_executable(libompdevice ${src_files}) |
| set_target_properties(libompdevice PROPERTIES |
| RUNTIME_OUTPUT_DIRECTORY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}" |
| LINKER_LANGUAGE CXX |
| BUILD_RPATH "" |
| INSTALL_RPATH "" |
| RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc) |
| |
| # If the user built with the GPU C library enabled we will use that instead. |
| if(TARGET libc) |
| target_compile_definitions(libompdevice PRIVATE OMPTARGET_HAS_LIBC) |
| endif() |
| target_compile_definitions(libompdevice PRIVATE SHARED_SCRATCHPAD_SIZE=512) |
| |
| target_include_directories(libompdevice PRIVATE |
| ${CMAKE_CURRENT_SOURCE_DIR}/include |
| ${CMAKE_CURRENT_SOURCE_DIR}/../../libc |
| ${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include) |
| target_compile_options(libompdevice PRIVATE ${compile_options}) |
| target_link_options(libompdevice PRIVATE |
| "-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm") |
| if(LLVM_DEFAULT_TARGET_TRIPLE) |
| target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}") |
| endif() |
| install(TARGETS libompdevice |
| PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ |
| DESTINATION ${OPENMP_INSTALL_LIBDIR}) |
| |
| add_library(ompdevice.all_objs OBJECT IMPORTED) |
| set_property(TARGET ompdevice.all_objs APPEND PROPERTY IMPORTED_OBJECTS |
| ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/libomptarget-${target_name}.bc) |
| |
| # Archive all the object files generated above into a static library |
| add_library(ompdevice STATIC) |
| add_dependencies(ompdevice libompdevice) |
| set_target_properties(ompdevice PROPERTIES |
| ARCHIVE_OUTPUT_DIRECTORY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}" |
| LINKER_LANGUAGE CXX |
| ) |
| target_link_libraries(ompdevice PRIVATE ompdevice.all_objs) |
| install(TARGETS ompdevice ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") |