[OpenMP] Disable early vectorization of loads/stores in the runtime
We are having a hard time optimizing some vectorized loads/stores later
on which causes this optimization to degrade performance.
Differential Revision: https://reviews.llvm.org/D158656
GitOrigin-RevId: 80906ce48d5bb28e06d0a113e7d62eb1358c8ecc
diff --git a/libomptarget/DeviceRTL/CMakeLists.txt b/libomptarget/DeviceRTL/CMakeLists.txt
index 9615657..47cb2fd 100644
--- a/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/libomptarget/DeviceRTL/CMakeLists.txt
@@ -109,8 +109,14 @@
${source_directory}/Workshare.cpp
)
-set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512)
-set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module)
+# We disable the slp vectorizer during the runtime optimization to avoid
+# vectorized accesses to the shared state. Generally, those are "good" but
+# the optimizer pipeline (esp. Attributor) does not fully support vectorized
+# instructions yet and we end up missing out on way more important constant
+# propagation. That said, we will run the vectorizer again after the runtime
+# has been linked into the user program.
+set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false )
+set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false )
set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
# Prepend -I to each list element