[lld-macho] Parallelize UUID hash computation

This reuses the approach (and some code) from LLD-ELF.

It's a decent win when linking chromium_framework on a Mac Pro (3.2 GHz 16-Core Intel Xeon W):

      N           Min           Max        Median           Avg        Stddev
  x  20          4.58          4.83          4.66        4.6685   0.066591844
  +  20          4.42          4.61           4.5         4.505    0.04751731
  Difference at 95.0% confidence
          -0.1635 +/- 0.0370242
          -3.5022% +/- 0.793064%
          (Student's t, pooled s = 0.0578462)

The output binary is 381MB.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D99279

GitOrigin-RevId: 9b6dde8af8f0880690792e3faa7987a8529232f6
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 5f5f7cc..2301a06 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -19,6 +19,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
+#include "lld/Common/Arrays.h"
 #include "lld/Common/Filesystem.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
@@ -2972,19 +2973,6 @@
       sec->writeTo<ELFT>(Out::bufferStart + sec->offset);
 }
 
-// Split one uint8 array into small pieces of uint8 arrays.
-static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> arr,
-                                            size_t chunkSize) {
-  std::vector<ArrayRef<uint8_t>> ret;
-  while (arr.size() > chunkSize) {
-    ret.push_back(arr.take_front(chunkSize));
-    arr = arr.drop_front(chunkSize);
-  }
-  if (!arr.empty())
-    ret.push_back(arr);
-  return ret;
-}
-
 // Computes a hash value of Data using a given hash function.
 // In order to utilize multiple cores, we first split data into 1MB
 // chunks, compute a hash for each chunk, and then compute a hash value
diff --git a/MachO/Writer.cpp b/MachO/Writer.cpp
index aa85aa3..a3b307a 100644
--- a/MachO/Writer.cpp
+++ b/MachO/Writer.cpp
@@ -20,12 +20,14 @@
 #include "Target.h"
 #include "UnwindInfoSection.h"
 
+#include "lld/Common/Arrays.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
@@ -920,10 +922,21 @@
       osec->writeTo(buf + osec->fileOff);
 }
 
+// In order to utilize multiple cores, we first split the buffer into chunks,
+// compute a hash for each chunk, and then compute a hash value of the hash
+// values.
 void Writer::writeUuid() {
   TimeTraceScope timeScope("Computing UUID");
-  uint64_t digest =
-      xxHash64({buffer->getBufferStart(), buffer->getBufferEnd()});
+  ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
+  unsigned chunkCount = parallel::strategy.compute_thread_count() * 10;
+  // Round-up integer division
+  size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount;
+  std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize);
+  std::vector<uint64_t> hashes(chunks.size());
+  parallelForEachN(0, chunks.size(),
+                   [&](size_t i) { hashes[i] = xxHash64(chunks[i]); });
+  uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
+                              hashes.size() * sizeof(uint64_t)});
   uuidCommand->writeUuid(digest);
 }
 
diff --git a/include/lld/Common/Arrays.h b/include/lld/Common/Arrays.h
new file mode 100644
index 0000000..b4c25ec
--- /dev/null
+++ b/include/lld/Common/Arrays.h
@@ -0,0 +1,32 @@
+//===- Arrays.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ARRAYS_H
+#define LLD_ARRAYS_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+#include <vector>
+
+namespace lld {
+// Split one uint8 array into small pieces of uint8 arrays.
+inline std::vector<llvm::ArrayRef<uint8_t>> split(llvm::ArrayRef<uint8_t> arr,
+                                                  size_t chunkSize) {
+  std::vector<llvm::ArrayRef<uint8_t>> ret;
+  while (arr.size() > chunkSize) {
+    ret.push_back(arr.take_front(chunkSize));
+    arr = arr.drop_front(chunkSize);
+  }
+  if (!arr.empty())
+    ret.push_back(arr);
+  return ret;
+}
+
+} // namespace lld
+
+#endif