Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
For more details about BF16 intrinsic, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
Patch by LiuTianle
Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, spatel, RKSimon
Reviewed By: craig.topper
Subscribers: mgorny, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D60552
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@360018 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/docs/ClangCommandLineReference.rst b/docs/ClangCommandLineReference.rst
index e852c3e..f153a0c 100644
--- a/docs/ClangCommandLineReference.rst
+++ b/docs/ClangCommandLineReference.rst
@@ -2610,6 +2610,8 @@
.. option:: -mavx512bitalg, -mno-avx512bitalg
+.. option:: -mavx512bf16, -mno-avx512bf16
+
.. option:: -mavx512bw, -mno-avx512bw
.. option:: -mavx512cd, -mno-avx512cd
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 5cbab8a..6eef695 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -1831,6 +1831,24 @@
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:",
+ "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256, "V8sV8f", "ncV:256:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512, "V16sV16f", "ncV:512:",
+ "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:",
+ "avx512bf16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:",
+ "avx512bf16")
// generic select intrinsics
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index 6d4aa1b..a293a39 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -2854,6 +2854,8 @@
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
+def mavx512bf16 : Flag<["-"], "mavx512bf16">, Group<m_x86_Features_Group>;
+def mno_avx512bf16 : Flag<["-"], "mno-avx512bf16">, Group<m_x86_Features_Group>;
def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp
index 2345d7f..b83c938 100644
--- a/lib/Basic/Targets/X86.cpp
+++ b/lib/Basic/Targets/X86.cpp
@@ -521,6 +521,7 @@
Features["avx512ifma"] = Features["avx512vpopcntdq"] =
Features["avx512bitalg"] = Features["avx512vnni"] =
Features["avx512vbmi2"] = false;
+ Features["avx512bf16"] = false;
break;
}
}
@@ -652,16 +653,22 @@
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
Name == "avx512vbmi" || Name == "avx512ifma" ||
Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
+ Name == "avx512bf16" ||
Name == "avx512vnni" || Name == "avx512vbmi2") {
if (Enabled)
setSSELevel(Features, AVX512F, Enabled);
// Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
Features["avx512bw"] = true;
+ if (Name == "avx512bf16" && Enabled)
+ Features["avx512bw"] = Features["avx512vl"] = true;
// Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
if (Name == "avx512bw" && !Enabled)
Features["avx512vbmi"] = Features["avx512vbmi2"] =
+ Features["avx512bf16"] =
Features["avx512bitalg"] = false;
+ if (Name == "avx512vl" && !Enabled)
+ Features["avx512bf16"] = false;
} else if (Name == "fma") {
if (Enabled)
setSSELevel(Features, AVX, Enabled);
@@ -751,6 +758,8 @@
HasAVX512VPOPCNTDQ = true;
} else if (Feature == "+avx512vnni") {
HasAVX512VNNI = true;
+ } else if (Feature == "+avx512bf16") {
+ HasAVX512BF16 = true;
} else if (Feature == "+avx512er") {
HasAVX512ER = true;
} else if (Feature == "+avx512pf") {
@@ -1141,6 +1150,8 @@
Builder.defineMacro("__AVX512VPOPCNTDQ__");
if (HasAVX512VNNI)
Builder.defineMacro("__AVX512VNNI__");
+ if (HasAVX512BF16)
+ Builder.defineMacro("__AVX512BF16__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
if (HasAVX512PF)
@@ -1305,6 +1316,7 @@
.Case("avx512cd", true)
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
+ .Case("avx512bf16", true)
.Case("avx512er", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
@@ -1383,6 +1395,7 @@
.Case("avx512cd", HasAVX512CD)
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
+ .Case("avx512bf16", HasAVX512BF16)
.Case("avx512er", HasAVX512ER)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h
index 9f285d8..d7a87f8 100644
--- a/lib/Basic/Targets/X86.h
+++ b/lib/Basic/Targets/X86.h
@@ -68,6 +68,7 @@
bool HasAVX512CD = false;
bool HasAVX512VPOPCNTDQ = false;
bool HasAVX512VNNI = false;
+ bool HasAVX512BF16 = false;
bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 5abb62c..14b8d0b 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -11851,6 +11851,14 @@
case X86::BI__builtin_ia32_cmpordsd:
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
+// AVX512 bf16 intrinsics
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+ Ops[2] = getMaskVecValue(*this, Ops[2],
+ Ops[0]->getType()->getVectorNumElements());
+ Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
+
case X86::BI__emul:
case X86::BI__emulu: {
llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index a921d99..bfcade4 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -6,6 +6,7 @@
armintr.h
arm64intr.h
avx2intrin.h
+ avx512bf16intrin.h
avx512bwintrin.h
avx512bitalgintrin.h
avx512vlbitalgintrin.h
@@ -21,6 +22,7 @@
avx512vbmivlintrin.h
avx512vbmi2intrin.h
avx512vlvbmi2intrin.h
+ avx512vlbf16intrin.h
avx512vlbwintrin.h
avx512vlcdintrin.h
avx512vldqintrin.h
diff --git a/lib/Headers/cpuid.h b/lib/Headers/cpuid.h
index 137b9bc..ffe638a 100644
--- a/lib/Headers/cpuid.h
+++ b/lib/Headers/cpuid.h
@@ -184,6 +184,9 @@
#define bit_PCONFIG 0x00040000
#define bit_IBT 0x00100000
+/* Features in %eax for leaf 7 sub-leaf 1 */
+#define bit_AVX512BF16 0x00000020
+
/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
#define bit_XSAVEC 0x00000002
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 1fda86e..2bd79bd 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -181,6 +181,15 @@
#include <avx512pfintrin.h>
#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
+#include <avx512bf16intrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VL__) && defined(__AVX512BF16__))
+#include <avx512vlbf16intrin.h>
+#endif
+
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
#endif
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index 56ccaf9..e3a2cb2 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -50,9 +50,9 @@
// CHECK: use_before_def{{.*}} #7
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c
index ee2e6af..d925f68 100644
--- a/test/Driver/x86-target-features.c
+++ b/test/Driver/x86-target-features.c
@@ -178,3 +178,8 @@
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-invpcid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-INVPCID %s
// INVPCID: "-target-feature" "+invpcid"
// NO-INVPCID: "-target-feature" "-invpcid"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX512BF16 %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s
+// AVX512BF16: "-target-feature" "+avx512bf16"
+// NO-AVX512BF16: "-target-feature" "-avx512bf16"
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index 3e4ffe4..54f56a8 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -443,3 +443,18 @@
// RUN: %clang -target i386-unknown-unknown -march=atom -mrdpid -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=RDPID %s
// RDPID: #define __RDPID__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16 %s
+
+// AVX512BF16: #define __AVX512BF16__ 1
+// AVX512BF16: #define __AVX512BW__ 1
+// AVX512BF16: #define __AVX512VL__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s
+
+// AVX512BF16_NOAVX512BW-NOT: #define __AVX512BF16__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s
+
+// AVX512BF16_NOAVX512VL-NOT: #define __AVX512BF16__ 1
+