[X86] Always define the tzcnt intrinsics even when _MSC_VER is defined.

These intrinsics use llvm.cttz intrinsics so are always available
even without the bmi feature. We already don't check for the bmi
feature on the intrinsics themselves. But we were blocking the
include of the header file with _MSC_VER unless BMI was enabled
on the command line.

Fixes PR30506.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374516 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h
index b7af62f..841bd84 100644
--- a/lib/Headers/bmiintrin.h
+++ b/lib/Headers/bmiintrin.h
@@ -14,27 +14,13 @@
 #ifndef __BMIINTRIN_H
 #define __BMIINTRIN_H
 
-#define _tzcnt_u16(a)     (__tzcnt_u16((a)))
-
-#define _andn_u32(a, b)   (__andn_u32((a), (b)))
-
-/* _bextr_u32 != __bextr_u32 */
-#define _blsi_u32(a)      (__blsi_u32((a)))
-
-#define _blsmsk_u32(a)    (__blsmsk_u32((a)))
-
-#define _blsr_u32(a)      (__blsr_u32((a)))
-
-#define _tzcnt_u32(a)     (__tzcnt_u32((a)))
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
-
 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
    instruction behaves as BSF on non-BMI targets, there is code that expects
    to use it as a potentially faster version of BSF. */
 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
+#define _tzcnt_u16(a)     (__tzcnt_u16((a)))
+
 /// Counts the number of trailing zero bits in the operand.
 ///
 /// \headerfile <x86intrin.h>
@@ -51,6 +37,94 @@
   return __builtin_ia32_tzcnt_u16(__X);
 }
 
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+///
+/// \param __X
+///    An unsigned 32-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 32-bit integer containing the number of trailing zero
+///    bits in the operand.
+static __inline__ unsigned int __RELAXED_FN_ATTRS
+__tzcnt_u32(unsigned int __X)
+{
+  return __builtin_ia32_tzcnt_u32(__X);
+}
+
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+///
+/// \param __X
+///    An unsigned 32-bit integer whose trailing zeros are to be counted.
+/// \returns An 32-bit integer containing the number of trailing zero bits in
+///    the operand.
+static __inline__ int __RELAXED_FN_ATTRS
+_mm_tzcnt_32(unsigned int __X)
+{
+  return __builtin_ia32_tzcnt_u32(__X);
+}
+
+#define _tzcnt_u32(a)     (__tzcnt_u32((a)))
+
+#ifdef __x86_64__
+
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 64-bit integer containing the number of trailing zero
+///    bits in the operand.
+static __inline__ unsigned long long __RELAXED_FN_ATTRS
+__tzcnt_u64(unsigned long long __X)
+{
+  return __builtin_ia32_tzcnt_u64(__X);
+}
+
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose trailing zeros are to be counted.
+/// \returns An 64-bit integer containing the number of trailing zero bits in
+///    the operand.
+static __inline__ long long __RELAXED_FN_ATTRS
+_mm_tzcnt_64(unsigned long long __X)
+{
+  return __builtin_ia32_tzcnt_u64(__X);
+}
+
+#define _tzcnt_u64(a)     (__tzcnt_u64((a)))
+
+#endif /* __x86_64__ */
+
+#undef __RELAXED_FN_ATTRS
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
+
+#define _andn_u32(a, b)   (__andn_u32((a), (b)))
+
+/* _bextr_u32 != __bextr_u32 */
+#define _blsi_u32(a)      (__blsi_u32((a)))
+
+#define _blsmsk_u32(a)    (__blsmsk_u32((a)))
+
+#define _blsr_u32(a)      (__blsr_u32((a)))
+
 /// Performs a bitwise AND of the second operand with the one's
 ///    complement of the first operand.
 ///
@@ -169,38 +243,6 @@
   return __X & (__X - 1);
 }
 
-/// Counts the number of trailing zero bits in the operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
-///
-/// \param __X
-///    An unsigned 32-bit integer whose trailing zeros are to be counted.
-/// \returns An unsigned 32-bit integer containing the number of trailing zero
-///    bits in the operand.
-static __inline__ unsigned int __RELAXED_FN_ATTRS
-__tzcnt_u32(unsigned int __X)
-{
-  return __builtin_ia32_tzcnt_u32(__X);
-}
-
-/// Counts the number of trailing zero bits in the operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
-///
-/// \param __X
-///    An unsigned 32-bit integer whose trailing zeros are to be counted.
-/// \returns An 32-bit integer containing the number of trailing zero bits in
-///    the operand.
-static __inline__ int __RELAXED_FN_ATTRS
-_mm_tzcnt_32(unsigned int __X)
-{
-  return __builtin_ia32_tzcnt_u32(__X);
-}
-
 #ifdef __x86_64__
 
 #define _andn_u64(a, b)   (__andn_u64((a), (b)))
@@ -212,8 +254,6 @@
 
 #define _blsr_u64(a)      (__blsr_u64((a)))
 
-#define _tzcnt_u64(a)     (__tzcnt_u64((a)))
-
 /// Performs a bitwise AND of the second operand with the one's
 ///    complement of the first operand.
 ///
@@ -332,41 +372,10 @@
   return __X & (__X - 1);
 }
 
-/// Counts the number of trailing zero bits in the operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
-///
-/// \param __X
-///    An unsigned 64-bit integer whose trailing zeros are to be counted.
-/// \returns An unsigned 64-bit integer containing the number of trailing zero
-///    bits in the operand.
-static __inline__ unsigned long long __RELAXED_FN_ATTRS
-__tzcnt_u64(unsigned long long __X)
-{
-  return __builtin_ia32_tzcnt_u64(__X);
-}
-
-/// Counts the number of trailing zero bits in the operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
-///
-/// \param __X
-///    An unsigned 64-bit integer whose trailing zeros are to be counted.
-/// \returns An 64-bit integer containing the number of trailing zero bits in
-///    the operand.
-static __inline__ long long __RELAXED_FN_ATTRS
-_mm_tzcnt_64(unsigned long long __X)
-{
-  return __builtin_ia32_tzcnt_u64(__X);
-}
-
 #endif /* __x86_64__ */
 
 #undef __DEFAULT_FN_ATTRS
-#undef __RELAXED_FN_ATTRS
+
+#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */
 
 #endif /* __BMIINTRIN_H */
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 7555ad8..ae900ee 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -64,9 +64,8 @@
 #include <vpclmulqdqintrin.h>
 #endif
 
-#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
+/* No feature check desired due to internal checks */
 #include <bmiintrin.h>
-#endif
 
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
 #include <bmi2intrin.h>
diff --git a/test/CodeGen/bmi-builtins.c b/test/CodeGen/bmi-builtins.c
index 9eda3f6..9f2d776 100644
--- a/test/CodeGen/bmi-builtins.c
+++ b/test/CodeGen/bmi-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK_TZCNT
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=CHECK-TZCNT
 
 
 #include <immintrin.h>
@@ -13,12 +14,57 @@
 // instruction is identical in hardware, the AMD and Intel
 // intrinsics are different!
 
+unsigned short test_tzcnt_u16(unsigned short __X) {
+  // CHECK-TZCNT-LABEL: test_tzcnt_u16
+  // CHECK-TZCNT: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
+  return _tzcnt_u16(__X);
+}
+
 unsigned short test__tzcnt_u16(unsigned short __X) {
-  // CHECK-LABEL: test__tzcnt_u16
-  // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
+  // CHECK-TZCNT-LABEL: test__tzcnt_u16
+  // CHECK-TZCNT: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
   return __tzcnt_u16(__X);
 }
 
+unsigned int test__tzcnt_u32(unsigned int __X) {
+  // CHECK-TZCNT-LABEL: test__tzcnt_u32
+  // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
+  return __tzcnt_u32(__X);
+}
+
+int test_mm_tzcnt_32(unsigned int __X) {
+  // CHECK-TZCNT-LABEL: test_mm_tzcnt_32
+  // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
+  return _mm_tzcnt_32(__X);
+}
+
+unsigned int test_tzcnt_u32(unsigned int __X) {
+  // CHECK-TZCNT-LABEL: test_tzcnt_u32
+  // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
+  return _tzcnt_u32(__X);
+}
+
+#ifdef __x86_64__
+unsigned long long test__tzcnt_u64(unsigned long long __X) {
+  // CHECK-TZCNT-LABEL: test__tzcnt_u64
+  // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+  return __tzcnt_u64(__X);
+}
+
+long long test_mm_tzcnt_64(unsigned long long __X) {
+  // CHECK-TZCNT-LABEL: test_mm_tzcnt_64
+  // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+  return _mm_tzcnt_64(__X);
+}
+
+unsigned long long test_tzcnt_u64(unsigned long long __X) {
+  // CHECK-TZCNT-LABEL: test_tzcnt_u64
+  // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+  return _tzcnt_u64(__X);
+}
+#endif
+
+#if !defined(TEST_TZCNT)
 unsigned int test__andn_u32(unsigned int __X, unsigned int __Y) {
   // CHECK-LABEL: test__andn_u32
   // CHECK: xor i32 %{{.*}}, -1
@@ -53,18 +99,6 @@
   return __blsr_u32(__X);
 }
 
-unsigned int test__tzcnt_u32(unsigned int __X) {
-  // CHECK-LABEL: test__tzcnt_u32
-  // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
-  return __tzcnt_u32(__X);
-}
-
-int test_mm_tzcnt_32(unsigned int __X) {
-  // CHECK-LABEL: test_mm_tzcnt_32
-  // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
-  return _mm_tzcnt_32(__X);
-}
-
 #ifdef __x86_64__
 unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) {
   // CHECK-LABEL: test__andn_u64
@@ -99,28 +133,10 @@
   // CHECK: and i64 %{{.*}}, %{{.*}}
   return __blsr_u64(__X);
 }
-
-unsigned long long test__tzcnt_u64(unsigned long long __X) {
-  // CHECK-LABEL: test__tzcnt_u64
-  // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
-  return __tzcnt_u64(__X);
-}
-
-long long test_mm_tzcnt_64(unsigned long long __X) {
-  // CHECK-LABEL: test_mm_tzcnt_64
-  // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
-  return _mm_tzcnt_64(__X);
-}
 #endif
 
 // Intel intrinsics
 
-unsigned short test_tzcnt_u16(unsigned short __X) {
-  // CHECK-LABEL: test_tzcnt_u16
-  // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
-  return _tzcnt_u16(__X);
-}
-
 unsigned int test_andn_u32(unsigned int __X, unsigned int __Y) {
   // CHECK-LABEL: test_andn_u32
   // CHECK: xor i32 %{{.*}}, -1
@@ -160,12 +176,6 @@
   return _blsr_u32(__X);
 }
 
-unsigned int test_tzcnt_u32(unsigned int __X) {
-  // CHECK-LABEL: test_tzcnt_u32
-  // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
-  return _tzcnt_u32(__X);
-}
-
 #ifdef __x86_64__
 unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) {
   // CHECK-LABEL: test_andn_u64
@@ -206,10 +216,6 @@
   // CHECK: and i64 %{{.*}}, %{{.*}}
   return _blsr_u64(__X);
 }
-
-unsigned long long test_tzcnt_u64(unsigned long long __X) {
-  // CHECK-LABEL: test_tzcnt_u64
-  // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
-  return _tzcnt_u64(__X);
-}
 #endif
+
+#endif // !defined(TEST_TZCNT)