[OpenMP] Added the support for cache line size 256 for A64FX
Fugaku supercomputer is built with the Fujitsu A64FX microprocessor, whose cache line is 256. In current libomp, we only have cache line size 128 for PPC64 and otherwise 64. This patch added the support of cache line 256 for A64FX. It's worth noting that although A64FX is a variant of AArch64, this property is not shared. As a result, in light of UCX source code (https://github.com/openucx/ucx/blob/392443ab92626412605dee1572056f79c897c6c3/src/ucs/arch/aarch64/cpu.c#L17), we can only determine by checking whether the CPU is FUJITSU A64FX.
Reviewed By: jdoerfert, Hahnfeld
Differential Revision: https://reviews.llvm.org/D93169
GitOrigin-RevId: 676c7cb0c0d4b66affde3ff7fc566c7a5aaa7246
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index e24528e..6d8a539 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -66,7 +66,18 @@
endif ()
set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
endif()
-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 mic mips mips64 riscv64)
+
+# FUJITSU A64FX is a special processor because its cache line size is 256.
+# We need to pass this information into kmp_config.h.
+if(LIBOMP_ARCH STREQUAL "aarch64")
+ libomp_is_aarch64_a64fx(LIBOMP_DETECT_AARCH64_A64FX)
+ if (LIBOMP_DETECT_AARCH64_A64FX)
+ set(LIBOMP_ARCH "aarch64_a64fx")
+ set(LIBOMP_ARCH_AARCH64_A64FX TRUE)
+ endif()
+endif()
+
+libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64)
set(LIBOMP_LIB_TYPE normal CACHE STRING
"Performance,Profiling,Stubs library (normal/profile/stubs)")
@@ -136,6 +147,7 @@
set(INTEL64 FALSE)
set(ARM FALSE)
set(AARCH64 FALSE)
+set(AARCH64_A64FX FALSE)
set(PPC64BE FALSE)
set(PPC64LE FALSE)
set(PPC64 FALSE)
@@ -157,6 +169,8 @@
set(PPC64 TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "aarch64") # AARCH64 architecture
set(AARCH64 TRUE)
+elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_a64fx") # AARCH64_A64FX architecture
+ set(AARCH64_A64FX TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "mic") # Intel(R) Many Integrated Core Architecture
set(MIC TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture
diff --git a/runtime/cmake/LibompGetArchitecture.cmake b/runtime/cmake/LibompGetArchitecture.cmake
index 897f99a..45c2f27 100644
--- a/runtime/cmake/LibompGetArchitecture.cmake
+++ b/runtime/cmake/LibompGetArchitecture.cmake
@@ -69,3 +69,16 @@
# Remove ${detect_arch_src_txt} from cmake/ subdirectory
file(REMOVE "${CMAKE_CURRENT_BINARY_DIR}/libomp_detect_arch.c")
endfunction()
+
+function(libomp_is_aarch64_a64fx return_is_aarch64_a64fx)
+ file(READ "/proc/cpuinfo" cpu_info_content)
+ string(REGEX MATCH "CPU implementer[ \t]*: 0x46\n" cpu_implementer ${cpu_info_content})
+ string(REGEX MATCH "CPU architecture[ \t]*: 8\n" cpu_architecture ${cpu_info_content})
+
+ set(is_aarch64_a64fx FALSE)
+ if (cpu_architecture AND cpu_implementer)
+ set(is_aarch64_a64fx TRUE)
+ endif()
+
+ set(${return_is_aarch64_a64fx} "${is_aarch64_a64fx}" PARENT_SCOPE)
+endfunction(libomp_is_aarch64_a64fx)
diff --git a/runtime/cmake/LibompUtils.cmake b/runtime/cmake/LibompUtils.cmake
index 44d2363..b1de242 100644
--- a/runtime/cmake/LibompUtils.cmake
+++ b/runtime/cmake/LibompUtils.cmake
@@ -101,6 +101,8 @@
set(${return_arch_string} "PPC64LE" PARENT_SCOPE)
elseif(${AARCH64})
set(${return_arch_string} "AARCH64" PARENT_SCOPE)
+ elseif(${AARCH64_A64FX})
+ set(${return_arch_string} "AARCH64_A64FX" PARENT_SCOPE)
elseif(${MIPS})
set(${return_arch_string} "MIPS" PARENT_SCOPE)
elseif(${MIPS64})
diff --git a/runtime/cmake/config-ix.cmake b/runtime/cmake/config-ix.cmake
index 7dcd68e..f06fda6 100644
--- a/runtime/cmake/config-ix.cmake
+++ b/runtime/cmake/config-ix.cmake
@@ -291,6 +291,7 @@
(LIBOMP_ARCH STREQUAL i386) OR
# (LIBOMP_ARCH STREQUAL arm) OR
(LIBOMP_ARCH STREQUAL aarch64) OR
+ (LIBOMP_ARCH STREQUAL aarch64_a64fx) OR
(LIBOMP_ARCH STREQUAL ppc64le) OR
(LIBOMP_ARCH STREQUAL ppc64) OR
(LIBOMP_ARCH STREQUAL riscv64))
diff --git a/runtime/src/kmp_config.h.cmake b/runtime/src/kmp_config.h.cmake
index 877a1e3..4010a11 100644
--- a/runtime/src/kmp_config.h.cmake
+++ b/runtime/src/kmp_config.h.cmake
@@ -82,10 +82,14 @@
#define KMP_HAVE_ATTRIBUTE_WAITPKG LIBOMP_HAVE_ATTRIBUTE_WAITPKG
#cmakedefine01 LIBOMP_HAVE_ATTRIBUTE_RTM
#define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM
+#cmakedefine01 LIBOMP_ARCH_AARCH64_A64FX
+#define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX
// Configured cache line based on architecture
#if KMP_ARCH_PPC64
# define CACHE_LINE 128
+#elif KMP_ARCH_AARCH64_A64FX
+# define CACHE_LINE 256
#else
# define CACHE_LINE 64
#endif