[compiler-rt] [scudo] Use -mcrc32 on x86 when available

Update the hardware CRC32 logic in scudo to support using `-mcrc32`
instead of `-msse4.2`.  The CRC32 intrinsics use the former flag
in the newer compiler versions, e.g. in clang since 12fa608af44a.
With these versions of clang, passing `-msse4.2` is insufficient
to enable the instructions and causes build failures when `-march` does
not enable CRC32 implicitly:

    /var/tmp/portage/sys-libs/compiler-rt-sanitizers-14.0.0/work/compiler-rt/lib/scudo/scudo_crc32.cpp:20:10: error: always_inline function '_mm_crc32_u32' requires target feature 'crc32', but would be inlined into function 'computeHardwareCRC32' that is compiled without support for 'crc32'
      return CRC32_INTRINSIC(Crc, Data);
             ^
    /var/tmp/portage/sys-libs/compiler-rt-sanitizers-14.0.0/work/compiler-rt/lib/scudo/scudo_crc32.h:27:27: note: expanded from macro 'CRC32_INTRINSIC'
    #  define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
                              ^
    /var/tmp/portage/sys-libs/compiler-rt-sanitizers-14.0.0/work/compiler-rt/lib/scudo/../sanitizer_common/sanitizer_platform.h:132:36: note: expanded from macro 'FIRST_32_SECOND_64'
    #  define FIRST_32_SECOND_64(a, b) (a)
                                       ^
    1 error generated.

For backwards compatibility, use `-mcrc32` when available and fall back
to `-msse4.2`.  The `<smmintrin.h>` header remains in use as it still
works and is compatible with GCC, while clang's `<crc32intrin.h>`
is not.

Use __builtin_ia32*() rather than _mm_crc32*() when using `-mcrc32`
to preserve compatibility with GCC.  _mm_crc32*() are aliases
to __builtin_ia32*() in both compilers but GCC requires `-msse4.2`
for the former, while both use `-mcrc32` for the latter.

Originally reported in https://bugs.gentoo.org/835870.

Differential Revision: https://reviews.llvm.org/D122789

GitOrigin-RevId: fd1da784ac644492f8ca40064baf3ef360352f55
diff --git a/checksum.h b/checksum.h
index a63b1b4..0f787ce 100644
--- a/checksum.h
+++ b/checksum.h
@@ -12,12 +12,16 @@
 #include "internal_defs.h"
 
 // Hardware CRC32 is supported at compilation via the following:
-// - for i386 & x86_64: -msse4.2
+// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2)
 // - for ARM & AArch64: -march=armv8-a+crc or -mcrc
 // An additional check must be performed at runtime as well to make sure the
 // emitted instructions are valid on the target host.
 
-#ifdef __SSE4_2__
+#if defined(__CRC32__)
+// NB: clang has <crc32intrin.h> but GCC does not
+#include <smmintrin.h>
+#define CRC32_INTRINSIC FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di)
+#elif defined(__SSE4_2__)
 #include <smmintrin.h>
 #define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
 #endif
diff --git a/chunk.h b/chunk.h
index 69b8e1b..0581420 100644
--- a/chunk.h
+++ b/chunk.h
@@ -25,7 +25,7 @@
   // as opposed to only for crc32_hw.cpp. This means that other hardware
   // specific instructions were likely emitted at other places, and as a result
   // there is no reason to not use it here.
-#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
   u32 Crc = static_cast<u32>(CRC32_INTRINSIC(Seed, Value));
   for (uptr I = 0; I < ArraySize; I++)
     Crc = static_cast<u32>(CRC32_INTRINSIC(Crc, Array[I]));
@@ -42,7 +42,7 @@
       Checksum = computeBSDChecksum(Checksum, Array[I]);
     return Checksum;
   }
-#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 }
 
 namespace Chunk {
diff --git a/crc32_hw.cpp b/crc32_hw.cpp
index 62841ba..d13c615 100644
--- a/crc32_hw.cpp
+++ b/crc32_hw.cpp
@@ -10,10 +10,10 @@
 
 namespace scudo {
 
-#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 u32 computeHardwareCRC32(u32 Crc, uptr Data) {
   return static_cast<u32>(CRC32_INTRINSIC(Crc, Data));
 }
-#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 
 } // namespace scudo