[ARM,CDE] Generalize MVE intrinsics infrastructure to support CDE

Summary:
This patch generalizes the existing code to support CDE intrinsics
which will share some properties with existing MVE intrinsics
(some of the intrinsics will be polymorphic and accept/return values
of MVE vector types).
Specifically the patch:
* Adds new tablegen backends -gen-arm-cde-builtin-def,
  -gen-arm-cde-builtin-codegen, -gen-arm-cde-builtin-sema,
  -gen-arm-cde-builtin-aliases, -gen-arm-cde-builtin-header based on
  existing MVE backends.
* Renames the '__clang_arm_mve_alias' attribute into
  '__clang_arm_builtin_alias' (it will be used with CDE intrinsics as
  well as MVE intrinsics)
* Implements semantic checks for the coprocessor argument of the CDE
  intrinsics as well as the existing coprocessor intrinsics.
* Adds one CDE intrinsic __arm_cx1 to test the above changes

Reviewers: simon_tatham, MarkMurrayARM, ostannard, dmgreen

Reviewed By: simon_tatham

Subscribers: sdesmalen, mgorny, kristof.beyls, danielkiss, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75850

GitOrigin-RevId: 47edf5bafb8ede52dca836eac770efffbf657d30
diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
index a5b0532..b18cfef 100644
--- a/include/clang/Basic/Attr.td
+++ b/include/clang/Basic/Attr.td
@@ -622,11 +622,11 @@
   let Documentation = [Undocumented];
 }
 
-def ArmMveAlias : InheritableAttr, TargetSpecificAttr<TargetARM> {
-  let Spellings = [Clang<"__clang_arm_mve_alias">];
+def ArmBuiltinAlias : InheritableAttr, TargetSpecificAttr<TargetARM> {
+  let Spellings = [Clang<"__clang_arm_builtin_alias">];
   let Args = [IdentifierArgument<"BuiltinName">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
-  let Documentation = [ArmMveAliasDocs];
+  let Documentation = [ArmBuiltinAliasDocs];
 }
 
 def Aligned : InheritableAttr {
diff --git a/include/clang/Basic/AttrDocs.td b/include/clang/Basic/AttrDocs.td
index cc9d3c8..aea5749 100644
--- a/include/clang/Basic/AttrDocs.td
+++ b/include/clang/Basic/AttrDocs.td
@@ -4669,11 +4669,11 @@
 }];
 }
 
-def ArmMveAliasDocs : Documentation {
+def ArmBuiltinAliasDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-This attribute is used in the implementation of the ACLE intrinsics
-for the Arm MVE instruction set. It allows the intrinsic functions to
+This attribute is used in the implementation of the ACLE intrinsics.
+It allows the intrinsic functions to
 be declared using the names defined in ACLE, and still be recognized
 as clang builtins equivalent to the underlying name. For example,
 ``arm_mve.h`` declares the function ``vaddq_u32`` with
@@ -4684,8 +4684,8 @@
 of which builtin to identify the function as can be deferred until
 after overload resolution.
 
-This attribute can only be used to set up the aliases for the MVE
-intrinsic functions; it is intended for use only inside ``arm_mve.h``,
+This attribute can only be used to set up the aliases for certain Arm
+intrinsic functions; it is intended for use only inside ``arm_*.h``
 and is not a general mechanism for declaring arbitrary aliases for
 clang builtin functions.
   }];
diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def
index 848abb4..be20c24 100644
--- a/include/clang/Basic/BuiltinsARM.def
+++ b/include/clang/Basic/BuiltinsARM.def
@@ -202,6 +202,8 @@
 // aren't included from both here and BuiltinsAArch64.def.)
 #include "clang/Basic/arm_mve_builtins.inc"
 
+#include "clang/Basic/arm_cde_builtins.inc"
+
 // MSVC
 LANGBUILTIN(__emit, "vIUiC", "", ALL_MS_LANGUAGES)
 
diff --git a/include/clang/Basic/CMakeLists.txt b/include/clang/Basic/CMakeLists.txt
index 5912034..ea011a8 100644
--- a/include/clang/Basic/CMakeLists.txt
+++ b/include/clang/Basic/CMakeLists.txt
@@ -60,3 +60,16 @@
 clang_tablegen(arm_mve_builtin_aliases.inc -gen-arm-mve-builtin-aliases
   SOURCE arm_mve.td
   TARGET ClangARMMveBuiltinAliases)
+
+clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinsDef)
+clang_tablegen(arm_cde_builtin_cg.inc -gen-arm-cde-builtin-codegen
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinCG)
+clang_tablegen(arm_cde_builtin_sema.inc -gen-arm-cde-builtin-sema
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinSema)
+clang_tablegen(arm_cde_builtin_aliases.inc -gen-arm-cde-builtin-aliases
+  SOURCE arm_cde.td
+  TARGET ClangARMCdeBuiltinAliases)
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index f4ed6b2..d8b4351 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -639,6 +639,8 @@
 def err_builtin_definition : Error<"definition of builtin function %0">;
 def err_builtin_redeclare : Error<"cannot redeclare builtin function %0">;
 def err_arm_invalid_specialreg : Error<"invalid special register for builtin">;
+def err_arm_invalid_coproc : Error<"coprocessor %0 must be configured as "
+  "%select{GCP|CDE}1">;
 def err_invalid_cpu_supports : Error<"invalid cpu feature string for builtin">;
 def err_invalid_cpu_is : Error<"invalid cpu name for builtin">;
 def err_invalid_cpu_specific_dispatch_value : Error<
@@ -6722,8 +6724,8 @@
   InGroup<DiagGroup<"objc-unsafe-perform-selector">>;
 def note_objc_unsafe_perform_selector_method_declared_here :  Note<
   "method %0 that returns %1 declared here">;
-def err_attribute_arm_mve_alias : Error<
-  "'__clang_arm_mve_alias' attribute can only be applied to an ARM MVE builtin">;
+def err_attribute_arm_builtin_alias : Error<
+  "'__clang_arm_builtin_alias' attribute can only be applied to an ARM builtin">;
 def err_attribute_arm_mve_polymorphism : Error<
   "'__clang_arm_mve_strict_polymorphism' attribute can only be applied to an MVE/NEON vector type">;
 
diff --git a/include/clang/Basic/arm_cde.td b/include/clang/Basic/arm_cde.td
new file mode 100644
index 0000000..222b639
--- /dev/null
+++ b/include/clang/Basic/arm_cde.td
@@ -0,0 +1,45 @@
+//===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the set of ACLE-specified source-level intrinsic
+// functions wrapping the CDE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+include "arm_mve_defs.td"
+
+class CDEIntrinsic<Type ret, dag args, dag codegen>
+  : Intrinsic<ret, args, codegen> {
+  let builtinExtension = "cde";
+}
+
+// Immediate (in range [0, 2^numBits - 1])
+class IB_ConstBits<int numBits> : IB_ConstRange<0, !add(!shl(1, numBits), -1)>;
+// numBits-wide immediate of type u32
+class CDEImmediateBits<int numBits> : Immediate<u32, IB_ConstBits<numBits>>;
+
+// LLVM IR CDE intrinsic
+class CDEIRInt<string name, list<Type> params = [], bit appendKind = 0>
+      : IRIntBase<"arm_cde_" # name, params, appendKind>;
+
+// Coprocessor immediate
+def imm_coproc : Immediate<sint, IB_ConstRange<0, 7>>;
+
+// Immediate integer parameters
+def imm_3b : CDEImmediateBits<3>;
+def imm_4b : CDEImmediateBits<4>;
+def imm_6b :  CDEImmediateBits<6>;
+def imm_7b :  CDEImmediateBits<7>;
+def imm_9b :  CDEImmediateBits<9>;
+def imm_11b : CDEImmediateBits<11>;
+def imm_12b : CDEImmediateBits<12>;
+def imm_13b : CDEImmediateBits<13>;
+
+let pnt = PNT_None, params = T.None in
+def cx1 : CDEIntrinsic<u32, (args imm_coproc:$cp, imm_13b:$imm),
+                            (CDEIRInt<"cx1"> $cp, $imm)>;
diff --git a/include/clang/Basic/arm_mve_defs.td b/include/clang/Basic/arm_mve_defs.td
index daf7387..f1424f2 100644
--- a/include/clang/Basic/arm_mve_defs.td
+++ b/include/clang/Basic/arm_mve_defs.td
@@ -476,9 +476,16 @@
   // True if the builtin has to avoid evaluating its arguments.
   bit nonEvaluating = 0;
 
+  // True if the intrinsic needs only the C header part (no codegen, semantic
+  // checks, etc). Used for redeclaring MVE intrinsics in the arm_cde.h header.
+  bit headerOnly = 0;
+
   // Use to override the suffix letter to make e.g.vfooq_p16
   // with an override suffix letter of "p".
   string overrideKindLetter = "";
+
+  // Name of the architecture extension, used in the Clang builtin name
+  string builtinExtension = "mve";
 }
 
 // Sometimes you have to use two separate Intrinsic declarations to
@@ -553,6 +560,7 @@
 // instead of having to repeat a long list every time.
 
 def T {
+  list<Type> None = [Void];
   list<Type> Signed = [s8, s16, s32];
   list<Type> Unsigned = [u8, u16, u32];
   list<Type> Int = Signed # Unsigned;
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 2304a97..60a4f2e 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -11764,6 +11764,8 @@
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckCDEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckARMCoprocessorImmediate(const Expr *CoprocArg, bool WantCDE);
   bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
 
   bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp
index 7625acd..5440544 100644
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -3148,8 +3148,8 @@
 unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
   unsigned BuiltinID;
 
-  if (const auto *AMAA = getAttr<ArmMveAliasAttr>()) {
-    BuiltinID = AMAA->getBuiltinName()->getBuiltinID();
+  if (const auto *ABAA = getAttr<ArmBuiltinAliasAttr>()) {
+    BuiltinID = ABAA->getBuiltinName()->getBuiltinID();
   } else {
     if (!getIdentifier())
       return 0;
@@ -3181,7 +3181,7 @@
   // If the function is marked "overloadable", it has a different mangled name
   // and is not the C library function.
   if (!ConsiderWrapperFunctions && hasAttr<OverloadableAttr>() &&
-      !hasAttr<ArmMveAliasAttr>())
+      !hasAttr<ArmBuiltinAliasAttr>())
     return 0;
 
   if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 5a1fb5d..090efaa 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -6538,6 +6538,9 @@
   // Deal with MVE builtins
   if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
     return Result;
+  // Handle CDE builtins
+  if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
+    return Result;
 
   // Find out if any arguments are required to be integer constant
   // expressions.
@@ -7212,6 +7215,17 @@
   llvm_unreachable("unknown custom codegen type.");
 }
 
+Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
+                                              const CallExpr *E,
+                                              ReturnValueSlot ReturnValue,
+                                              llvm::Triple::ArchType Arch) {
+  switch (BuiltinID) {
+  default:
+    return nullptr;
+#include "clang/Basic/arm_cde_builtin_cg.inc"
+  }
+}
+
 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
                                       const CallExpr *E,
                                       SmallVectorImpl<Value *> &Ops,
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 907b4d7..f82e489 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -3873,6 +3873,9 @@
   llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
                                      ReturnValueSlot ReturnValue,
                                      llvm::Triple::ArchType Arch);
+  llvm::Value *EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                     ReturnValueSlot ReturnValue,
+                                     llvm::Triple::ArchType Arch);
 
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
                                          unsigned LLVMIntrinsic,
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index f172d7a..9237843 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -186,6 +186,8 @@
 clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
 # Generate arm_mve.h
 clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
+# Generate arm_cde.h
+clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h)
 
 add_custom_target(clang-resource-headers ALL DEPENDS ${out_files})
 set_target_properties(clang-resource-headers PROPERTIES
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 2e73fca..24d0d92 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -2070,6 +2070,44 @@
   }
 }
 
+bool Sema::CheckCDEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  bool Err = false;
+  switch (BuiltinID) {
+  default:
+    return false;
+#include "clang/Basic/arm_cde_builtin_sema.inc"
+  }
+
+  if (Err)
+    return true;
+
+  return CheckARMCoprocessorImmediate(TheCall->getArg(0), /*WantCDE*/ true);
+}
+
+bool Sema::CheckARMCoprocessorImmediate(const Expr *CoprocArg, bool WantCDE) {
+  if (isConstantEvaluated())
+    return false;
+
+  // We can't check the value of a dependent argument.
+  if (CoprocArg->isTypeDependent() || CoprocArg->isValueDependent())
+    return false;
+
+  llvm::APSInt CoprocNoAP;
+  bool IsICE = CoprocArg->isIntegerConstantExpr(CoprocNoAP, Context);
+  assert(IsICE && "Coprocossor immediate is not a constant expression");
+  int64_t CoprocNo = CoprocNoAP.getExtValue();
+  assert(CoprocNo >= 0 && "Coprocessor immediate must be non-negative");
+
+  uint32_t CDECoprocMask = Context.getTargetInfo().getARMCDECoprocMask();
+  bool IsCDECoproc = CoprocNo <= 7 && (CDECoprocMask & (1 << CoprocNo));
+
+  if (IsCDECoproc != WantCDE)
+    return Diag(CoprocArg->getBeginLoc(), diag::err_arm_invalid_coproc)
+           << (int)CoprocNo << (int)WantCDE << CoprocArg->getSourceRange();
+
+  return false;
+}
+
 bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                         unsigned MaxWidth) {
   assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
@@ -2212,6 +2250,8 @@
     return true;
   if (CheckMVEBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
+  if (CheckCDEBuiltinFunctionCall(BuiltinID, TheCall))
+    return true;
 
   // For intrinsics which take an immediate value as part of the instruction,
   // range check them here.
@@ -2234,6 +2274,26 @@
   case ARM::BI__builtin_arm_isb:
   case ARM::BI__builtin_arm_dbg:
     return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15);
+  case ARM::BI__builtin_arm_cdp:
+  case ARM::BI__builtin_arm_cdp2:
+  case ARM::BI__builtin_arm_mcr:
+  case ARM::BI__builtin_arm_mcr2:
+  case ARM::BI__builtin_arm_mrc:
+  case ARM::BI__builtin_arm_mrc2:
+  case ARM::BI__builtin_arm_mcrr:
+  case ARM::BI__builtin_arm_mcrr2:
+  case ARM::BI__builtin_arm_mrrc:
+  case ARM::BI__builtin_arm_mrrc2:
+  case ARM::BI__builtin_arm_ldc:
+  case ARM::BI__builtin_arm_ldcl:
+  case ARM::BI__builtin_arm_ldc2:
+  case ARM::BI__builtin_arm_ldc2l:
+  case ARM::BI__builtin_arm_stc:
+  case ARM::BI__builtin_arm_stcl:
+  case ARM::BI__builtin_arm_stc2:
+  case ARM::BI__builtin_arm_stc2l:
+    return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15) ||
+           CheckARMCoprocessorImmediate(TheCall->getArg(0), /*WantCDE*/ false);
   }
 }
 
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 3a6c2af..2e1710f 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -4937,13 +4937,47 @@
                  PatchableFunctionEntryAttr(S.Context, AL, Count, Offset));
 }
 
-static bool ArmMveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+namespace {
+struct IntrinToName {
+  uint32_t Id;
+  int32_t FullName;
+  int32_t ShortName;
+};
+} // unnamed namespace
+
+static bool ArmBuiltinAliasValid(unsigned BuiltinID, StringRef AliasName,
+                                 ArrayRef<IntrinToName> Map,
+                                 const char *IntrinNames) {
   if (AliasName.startswith("__arm_"))
     AliasName = AliasName.substr(6);
-#include "clang/Basic/arm_mve_builtin_aliases.inc"
+  const IntrinToName *It = std::lower_bound(
+      Map.begin(), Map.end(), BuiltinID,
+      [](const IntrinToName &L, unsigned Id) { return L.Id < Id; });
+  if (It == Map.end() || It->Id != BuiltinID)
+    return false;
+  StringRef FullName(&IntrinNames[It->FullName]);
+  if (AliasName == FullName)
+    return true;
+  if (It->ShortName == -1)
+    return false;
+  StringRef ShortName(&IntrinNames[It->ShortName]);
+  return AliasName == ShortName;
 }
 
-static void handleArmMveAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+static bool ArmMveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+#include "clang/Basic/arm_mve_builtin_aliases.inc"
+  // The included file defines:
+  // - ArrayRef<IntrinToName> Map
+  // - const char IntrinNames[]
+  return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
+}
+
+static bool ArmCdeAliasValid(unsigned BuiltinID, StringRef AliasName) {
+#include "clang/Basic/arm_cde_builtin_aliases.inc"
+  return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
+}
+
+static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (!AL.isArgIdent(0)) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
         << AL << 1 << AANT_ArgumentIdentifier;
@@ -4952,14 +4986,15 @@
 
   IdentifierInfo *Ident = AL.getArgAsIdent(0)->Ident;
   unsigned BuiltinID = Ident->getBuiltinID();
+  StringRef AliasName = cast<FunctionDecl>(D)->getIdentifier()->getName();
 
-  if (!ArmMveAliasValid(BuiltinID,
-                        cast<FunctionDecl>(D)->getIdentifier()->getName())) {
-    S.Diag(AL.getLoc(), diag::err_attribute_arm_mve_alias);
+  if (!ArmMveAliasValid(BuiltinID, AliasName) &&
+      !ArmCdeAliasValid(BuiltinID, AliasName)) {
+    S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
     return;
   }
 
-  D->addAttr(::new (S.Context) ArmMveAliasAttr(S.Context, AL, Ident));
+  D->addAttr(::new (S.Context) ArmBuiltinAliasAttr(S.Context, AL, Ident));
 }
 
 //===----------------------------------------------------------------------===//
@@ -7441,8 +7476,8 @@
     handleMSAllocatorAttr(S, D, AL);
     break;
 
-  case ParsedAttr::AT_ArmMveAlias:
-    handleArmMveAliasAttr(S, D, AL);
+  case ParsedAttr::AT_ArmBuiltinAlias:
+    handleArmBuiltinAliasAttr(S, D, AL);
     break;
 
   case ParsedAttr::AT_AcquireHandle:
diff --git a/test/CodeGen/arm-cde-gpr.c b/test/CodeGen/arm-cde-gpr.c
new file mode 100644
index 0000000..9a24b15
--- /dev/null
+++ b/test/CodeGen/arm-cde-gpr.c
@@ -0,0 +1,16 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi \
+// RUN:   -target-feature +cdecp0 -target-feature +cdecp1 \
+// RUN:   -mfloat-abi hard -O0 -disable-O0-optnone \
+// RUN:   -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_cde.h>
+
+// CHECK-LABEL: @test_cx1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.arm.cde.cx1(i32 0, i32 123)
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t test_cx1() {
+  return __arm_cx1(0, 123);
+}
diff --git a/test/Headers/arm-cde-header.c b/test/Headers/arm-cde-header.c
new file mode 100644
index 0000000..e9ff649
--- /dev/null
+++ b/test/Headers/arm-cde-header.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c89 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+// RUN: %clang_cc1 -std=c99 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+// RUN: %clang_cc1 -std=c11 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -fsyntax-only %s
+
+// Check that the headers don't conflict with each other
+#include <arm_cde.h>
+#include <arm_mve.h>
diff --git a/test/Misc/pragma-attribute-supported-attributes-list.test b/test/Misc/pragma-attribute-supported-attributes-list.test
index 3e29eb4..769da31 100644
--- a/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -18,7 +18,7 @@
 // CHECK-NEXT: Annotate ()
 // CHECK-NEXT: AnyX86NoCfCheck (SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: ArcWeakrefUnavailable (SubjectMatchRule_objc_interface)
-// CHECK-NEXT: ArmMveAlias (SubjectMatchRule_function)
+// CHECK-NEXT: ArmBuiltinAlias (SubjectMatchRule_function)
 // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function)
 // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
 // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)
diff --git a/test/Sema/arm-cde-immediates.c b/test/Sema/arm-cde-immediates.c
new file mode 100644
index 0000000..bbc1366
--- /dev/null
+++ b/test/Sema/arm-cde-immediates.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -target-feature +cdecp0 -verify -fsyntax-only %s
+
+#include <arm_cde.h>
+#include <arm_acle.h>
+
+void test_coproc_gcp_instr(int a) {
+  __builtin_arm_cdp(0, 2, 3, 4, 5, 6); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_cdp2(0, 2, 3, 4, 5, 6); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcr(0, 0, a, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcr2(0, 0, a, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrc(0, 0, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrc2(0, 0, 13, 0, 3); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcrr(0, 0, a, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mcrr2(0, 0, a, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrrc(0, 0, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_mrrc2(0, 0, 0); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldc(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldcl(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldc2(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_ldc2l(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stc(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stcl(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stc2(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+  __builtin_arm_stc2l(0, 2, &a); // expected-error {{coprocessor 0 must be configured as GCP}}
+}
+
+void test_coproc(uint32_t a) {
+  (void)__arm_cx1(0, 0);
+  __arm_cx1(a, 0); // expected-error {{argument to '__arm_cx1' must be a constant integer}}
+  __arm_cx1(-1, 0); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  __arm_cx1(8, 0);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  __arm_cx1(1, 0); // expected-error {{coprocessor 1 must be configured as CDE}}
+}
+
+void test_cx(uint32_t a) {
+  (void)__arm_cx1(0, 0);
+  __arm_cx1(a, 0); // expected-error {{argument to '__arm_cx1' must be a constant integer}}
+  __arm_cx1(0, a);  // expected-error {{argument to '__arm_cx1' must be a constant integer}}
+  __arm_cx1(0, 8192);  // expected-error {{argument value 8192 is outside the valid range [0, 8191]}}
+}
diff --git a/test/Sema/arm-mve-alias-attribute.c b/test/Sema/arm-mve-alias-attribute.c
index c2f526d..6741cae 100644
--- a/test/Sema/arm-mve-alias-attribute.c
+++ b/test/Sema/arm-mve-alias-attribute.c
@@ -1,22 +1,28 @@
 // RUN: %clang_cc1 -triple armv8.1m.main-arm-none-eabi -verify -fsyntax-only %s
 
-static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_mve_alias' attribute can only be applied to an ARM MVE builtin}}
-void nop(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_builtin_alias' attribute can only be applied to an ARM builtin}}
+void
+nop(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias)) // expected-error {{'__clang_arm_mve_alias' attribute takes one argument}}
-void noparens(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias)) // expected-error {{'__clang_arm_builtin_alias' attribute takes one argument}}
+void
+noparens(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias())) // expected-error {{'__clang_arm_mve_alias' attribute takes one argument}}
-void emptyparens(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias())) // expected-error {{'__clang_arm_builtin_alias' attribute takes one argument}}
+void
+emptyparens(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias("string literal"))) // expected-error {{'__clang_arm_mve_alias' attribute requires parameter 1 to be an identifier}}
-void stringliteral(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias("string literal"))) // expected-error {{'__clang_arm_builtin_alias' attribute requires parameter 1 to be an identifier}}
+void
+stringliteral(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias(1))) // expected-error {{'__clang_arm_mve_alias' attribute requires parameter 1 to be an identifier}}
-void integer(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias(1))) // expected-error {{'__clang_arm_builtin_alias' attribute requires parameter 1 to be an identifier}}
+void
+integer(void);
 
-static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_nop, 2))) // expected-error {{'__clang_arm_mve_alias' attribute takes one argument}}
-void twoargs(void);
+static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_nop, 2))) // expected-error {{'__clang_arm_builtin_alias' attribute takes one argument}}
+void
+twoargs(void);
 
-static __attribute__((__clang_arm_mve_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_mve_alias' attribute only applies to functions}}
+static __attribute__((__clang_arm_builtin_alias(__builtin_arm_nop))) // expected-error {{'__clang_arm_builtin_alias' attribute only applies to functions}}
 int variable;
diff --git a/utils/TableGen/MveEmitter.cpp b/utils/TableGen/MveEmitter.cpp
index bc3c21f..9a9fe00 100644
--- a/utils/TableGen/MveEmitter.cpp
+++ b/utils/TableGen/MveEmitter.cpp
@@ -80,7 +80,7 @@
 
 namespace {
 
-class MveEmitter;
+class EmitterBase;
 class Result;
 
 // -----------------------------------------------------------------------------
@@ -140,6 +140,7 @@
   TypeKind typeKind() const { return TKind; }
   virtual ~Type() = default;
   virtual bool requiresFloat() const = 0;
+  virtual bool requiresMVE() const = 0;
   virtual unsigned sizeInBits() const = 0;
   virtual std::string cName() const = 0;
   virtual std::string llvmName() const {
@@ -179,6 +180,7 @@
   VoidType() : Type(TypeKind::Void) {}
   unsigned sizeInBits() const override { return 0; }
   bool requiresFloat() const override { return false; }
+  bool requiresMVE() const override { return false; }
   std::string cName() const override { return "void"; }
 
   static bool classof(const Type *T) { return T->typeKind() == TypeKind::Void; }
@@ -194,6 +196,7 @@
       : Type(TypeKind::Pointer), Pointee(Pointee), Const(Const) {}
   unsigned sizeInBits() const override { return 32; }
   bool requiresFloat() const override { return Pointee->requiresFloat(); }
+  bool requiresMVE() const override { return Pointee->requiresMVE(); }
   std::string cName() const override {
     std::string Name = Pointee->cName();
 
@@ -274,6 +277,7 @@
   }
   bool isInteger() const { return Kind != ScalarTypeKind::Float; }
   bool requiresFloat() const override { return !isInteger(); }
+  bool requiresMVE() const override { return false; }
   bool hasNonstandardName() const { return !NameOverride.empty(); }
 
   static bool classof(const Type *T) {
@@ -291,6 +295,7 @@
   unsigned sizeInBits() const override { return Lanes * Element->sizeInBits(); }
   unsigned lanes() const { return Lanes; }
   bool requiresFloat() const override { return Element->requiresFloat(); }
+  bool requiresMVE() const override { return true; }
   std::string cNameBase() const override {
     return Element->cNameBase() + "x" + utostr(Lanes);
   }
@@ -317,6 +322,7 @@
   }
   unsigned registers() const { return Registers; }
   bool requiresFloat() const override { return Element->requiresFloat(); }
+  bool requiresMVE() const override { return true; }
   std::string cNameBase() const override {
     return Element->cNameBase() + "x" + utostr(Registers);
   }
@@ -341,6 +347,7 @@
   unsigned sizeInBits() const override { return 16; }
   std::string cNameBase() const override { return "mve_pred16"; }
   bool requiresFloat() const override { return false; };
+  bool requiresMVE() const override { return true; }
   std::string llvmName() const override {
     // Use <4 x i1> instead of <2 x i1> for two-lane vector types. See
     // the comment in llvm/lib/Target/ARM/ARMInstrMVE.td for further
@@ -405,7 +412,7 @@
   // We rely on the recursive code generation working identically in passes 1
   // and 2, so that the same list of calls to allocParam happen in the same
   // order. That guarantees that the parameter numbers recorded in pass 1 will
-  // match the entries in this vector that store what MveEmitter::EmitBuiltinCG
+  // match the entries in this vector that store what EmitterBase::EmitBuiltinCG
   // decided to do about each one in pass 2.
   std::vector<int> *ParamNumberMap = nullptr;
 
@@ -794,6 +801,9 @@
   // shares with at least one other intrinsic.
   std::string ShortName, FullName;
 
+  // Name of the architecture extension, used in the Clang builtin name
+  StringRef BuiltinExtension;
+
   // A very small number of intrinsics _only_ have a polymorphic
   // variant (vuninitializedq taking an unevaluated argument).
   bool PolymorphicOnly;
@@ -802,6 +812,10 @@
   // evaluate its argument(s) at all.
   bool NonEvaluating;
 
+  // True if the intrinsic needs only the C header part (no codegen, semantic
+  // checks, etc). Used for redeclaring MVE intrinsics in the arm_cde.h header.
+  bool HeaderOnly;
+
   const Type *ReturnType;
   std::vector<const Type *> ArgTypes;
   std::map<unsigned, ImmediateArg> ImmediateArgs;
@@ -824,6 +838,7 @@
 public:
   const std::string &shortName() const { return ShortName; }
   const std::string &fullName() const { return FullName; }
+  StringRef builtinExtension() const { return BuiltinExtension; }
   const Type *returnType() const { return ReturnType; }
   const std::vector<const Type *> &argTypes() const { return ArgTypes; }
   bool requiresFloat() const {
@@ -834,13 +849,19 @@
         return true;
     return false;
   }
+  bool requiresMVE() const {
+    return ReturnType->requiresMVE() ||
+           any_of(ArgTypes, [](const Type *T) { return T->requiresMVE(); });
+  }
   bool polymorphic() const { return ShortName != FullName; }
   bool polymorphicOnly() const { return PolymorphicOnly; }
   bool nonEvaluating() const { return NonEvaluating; }
+  bool headerOnly() const { return HeaderOnly; }
 
-  // External entry point for code generation, called from MveEmitter.
+  // External entry point for code generation, called from EmitterBase.
   void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc,
                unsigned Pass) const {
+    assert(!headerOnly() && "Called genCode for header-only intrinsic");
     if (!hasCode()) {
       for (auto kv : CustomCodeGenArgs)
         OS << "  " << kv.first << " = " << kv.second << ";\n";
@@ -881,6 +902,7 @@
   }
 
   std::string genSema() const {
+    assert(!headerOnly() && "Called genSema for header-only intrinsic");
     std::vector<std::string> SemaChecks;
 
     for (const auto &kv : ImmediateArgs) {
@@ -932,22 +954,21 @@
     }
     if (SemaChecks.empty())
       return "";
-    return (Twine("  return ") +
-            join(std::begin(SemaChecks), std::end(SemaChecks),
-                 " ||\n         ") +
-            ";\n")
-        .str();
+    return join(std::begin(SemaChecks), std::end(SemaChecks),
+                " ||\n         ") +
+           ";\n";
   }
 
-  ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param);
+  ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param);
 };
 
 // -----------------------------------------------------------------------------
 // The top-level class that holds all the state from analyzing the entire
 // Tablegen input.
 
-class MveEmitter {
-  // MveEmitter holds a collection of all the types we've instantiated.
+class EmitterBase {
+protected:
+  // EmitterBase holds a collection of all the types we've instantiated.
   VoidType Void;
   std::map<std::string, std::unique_ptr<ScalarType>> ScalarTypes;
   std::map<std::tuple<ScalarTypeKind, unsigned, unsigned>,
@@ -1022,18 +1043,21 @@
   Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote,
                             bool Immediate);
 
+  void GroupSemaChecks(std::map<std::string, std::set<std::string>> &Checks);
+
   // Constructor and top-level functions.
 
-  MveEmitter(RecordKeeper &Records);
+  EmitterBase(RecordKeeper &Records);
+  virtual ~EmitterBase() = default;
 
-  void EmitHeader(raw_ostream &OS);
-  void EmitBuiltinDef(raw_ostream &OS);
-  void EmitBuiltinSema(raw_ostream &OS);
+  virtual void EmitHeader(raw_ostream &OS) = 0;
+  virtual void EmitBuiltinDef(raw_ostream &OS) = 0;
+  virtual void EmitBuiltinSema(raw_ostream &OS) = 0;
   void EmitBuiltinCG(raw_ostream &OS);
   void EmitBuiltinAliases(raw_ostream &OS);
 };
 
-const Type *MveEmitter::getType(Init *I, const Type *Param) {
+const Type *EmitterBase::getType(Init *I, const Type *Param) {
   if (auto Dag = dyn_cast<DagInit>(I))
     return getType(Dag, Param);
   if (auto Def = dyn_cast<DefInit>(I))
@@ -1042,7 +1066,7 @@
   PrintFatalError("Could not convert this value into a type");
 }
 
-const Type *MveEmitter::getType(Record *R, const Type *Param) {
+const Type *EmitterBase::getType(Record *R, const Type *Param) {
   // Pass to a subfield of any wrapper records. We don't expect more than one
   // of these: immediate operands are used as plain numbers rather than as
   // llvm::Value, so it's meaningless to promote their type anyway.
@@ -1061,7 +1085,7 @@
   PrintFatalError(R->getLoc(), "Could not convert this record into a type");
 }
 
-const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
+const Type *EmitterBase::getType(DagInit *D, const Type *Param) {
   // The meat of the getType system: types in the Tablegen are represented by a
   // dag whose operators select sub-cases of this function.
 
@@ -1129,8 +1153,8 @@
   PrintFatalError("Bad operator in type dag expression");
 }
 
-Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
-                                      const Type *Param) {
+Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope,
+                                       const Type *Param) {
   Record *Op = cast<DefInit>(D->getOperator())->getDef();
 
   if (Op->getName() == "seq") {
@@ -1232,9 +1256,9 @@
   }
 }
 
-Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
-                                         const Result::Scope &Scope,
-                                         const Type *Param) {
+Result::Ptr EmitterBase::getCodeForDagArg(DagInit *D, unsigned ArgNum,
+                                          const Result::Scope &Scope,
+                                          const Type *Param) {
   Init *Arg = D->getArg(ArgNum);
   StringRef Name = D->getArgNameStr(ArgNum);
 
@@ -1266,8 +1290,8 @@
   PrintFatalError("bad dag argument type for code generation");
 }
 
-Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
-                                      bool Promote, bool Immediate) {
+Result::Ptr EmitterBase::getCodeForArg(unsigned ArgNum, const Type *ArgType,
+                                       bool Promote, bool Immediate) {
   Result::Ptr V = std::make_shared<BuiltinArgResult>(
       ArgNum, isa<PointerType>(ArgType), Immediate);
 
@@ -1286,7 +1310,7 @@
   return V;
 }
 
-ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
+ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param)
     : ReturnType(ME.getType(R->getValueAsDef("ret"), Param)) {
   // Derive the intrinsic's full name, by taking the name of the
   // Tablegen record (or override) and appending the suffix from its
@@ -1325,8 +1349,11 @@
   }
   ShortName = join(std::begin(NameParts), std::end(NameParts), "_");
 
+  BuiltinExtension = R->getValueAsString("builtinExtension");
+
   PolymorphicOnly = R->getValueAsBit("polymorphicOnly");
   NonEvaluating = R->getValueAsBit("nonEvaluating");
+  HeaderOnly = R->getValueAsBit("headerOnly");
 
   // Process the intrinsic's argument list.
   DagInit *ArgsDag = R->getValueAsDag("args");
@@ -1419,8 +1446,8 @@
   }
 }
 
-MveEmitter::MveEmitter(RecordKeeper &Records) {
-  // Construct the whole MveEmitter.
+EmitterBase::EmitterBase(RecordKeeper &Records) {
+  // Construct the whole EmitterBase.
 
   // First, look up all the instances of PrimitiveType. This gives us the list
   // of vector typedefs we have to put in arm_mve.h, and also allows us to
@@ -1460,243 +1487,18 @@
       : string_holder(), raw_string_ostream(S) {}
 };
 
-void MveEmitter::EmitHeader(raw_ostream &OS) {
-  // Accumulate pieces of the header file that will be enabled under various
-  // different combinations of #ifdef. The index into parts[] is made up of
-  // the following bit flags.
-  constexpr unsigned Float = 1;
-  constexpr unsigned UseUserNamespace = 2;
-
-  constexpr unsigned NumParts = 4;
-  raw_self_contained_string_ostream parts[NumParts];
-
-  // Write typedefs for all the required vector types, and a few scalar
-  // types that don't already have the name we want them to have.
-
-  parts[0] << "typedef uint16_t mve_pred16_t;\n";
-  parts[Float] << "typedef __fp16 float16_t;\n"
-                  "typedef float float32_t;\n";
-  for (const auto &kv : ScalarTypes) {
-    const ScalarType *ST = kv.second.get();
-    if (ST->hasNonstandardName())
-      continue;
-    raw_ostream &OS = parts[ST->requiresFloat() ? Float : 0];
-    const VectorType *VT = getVectorType(ST);
-
-    OS << "typedef __attribute__((__neon_vector_type__(" << VT->lanes()
-       << "), __clang_arm_mve_strict_polymorphism)) " << ST->cName() << " "
-       << VT->cName() << ";\n";
-
-    // Every vector type also comes with a pair of multi-vector types for
-    // the VLD2 and VLD4 instructions.
-    for (unsigned n = 2; n <= 4; n += 2) {
-      const MultiVectorType *MT = getMultiVectorType(n, VT);
-      OS << "typedef struct { " << VT->cName() << " val[" << n << "]; } "
-         << MT->cName() << ";\n";
-    }
-  }
-  parts[0] << "\n";
-  parts[Float] << "\n";
-
-  // Write declarations for all the intrinsics.
-
-  for (const auto &kv : ACLEIntrinsics) {
-    const ACLEIntrinsic &Int = *kv.second;
-
-    // We generate each intrinsic twice, under its full unambiguous
-    // name and its shorter polymorphic name (if the latter exists).
-    for (bool Polymorphic : {false, true}) {
-      if (Polymorphic && !Int.polymorphic())
-        continue;
-      if (!Polymorphic && Int.polymorphicOnly())
-        continue;
-
-      // We also generate each intrinsic under a name like __arm_vfooq
-      // (which is in C language implementation namespace, so it's
-      // safe to define in any conforming user program) and a shorter
-      // one like vfooq (which is in user namespace, so a user might
-      // reasonably have used it for something already). If so, they
-      // can #define __ARM_MVE_PRESERVE_USER_NAMESPACE before
-      // including the header, which will suppress the shorter names
-      // and leave only the implementation-namespace ones. Then they
-      // have to write __arm_vfooq everywhere, of course.
-
-      for (bool UserNamespace : {false, true}) {
-        raw_ostream &OS = parts[(Int.requiresFloat() ? Float : 0) |
-                                (UserNamespace ? UseUserNamespace : 0)];
-
-        // Make the name of the function in this declaration.
-
-        std::string FunctionName =
-            Polymorphic ? Int.shortName() : Int.fullName();
-        if (!UserNamespace)
-          FunctionName = "__arm_" + FunctionName;
-
-        // Make strings for the types involved in the function's
-        // prototype.
-
-        std::string RetTypeName = Int.returnType()->cName();
-        if (!StringRef(RetTypeName).endswith("*"))
-          RetTypeName += " ";
-
-        std::vector<std::string> ArgTypeNames;
-        for (const Type *ArgTypePtr : Int.argTypes())
-          ArgTypeNames.push_back(ArgTypePtr->cName());
-        std::string ArgTypesString =
-            join(std::begin(ArgTypeNames), std::end(ArgTypeNames), ", ");
-
-        // Emit the actual declaration. All these functions are
-        // declared 'static inline' without a body, which is fine
-        // provided clang recognizes them as builtins, and has the
-        // effect that this type signature is used in place of the one
-        // that Builtins.def didn't provide. That's how we can get
-        // structure types that weren't defined until this header was
-        // included to be part of the type signature of a builtin that
-        // was known to clang already.
-        //
-        // The declarations use __attribute__(__clang_arm_mve_alias),
-        // so that each function declared will be recognized as the
-        // appropriate MVE builtin in spite of its user-facing name.
-        //
-        // (That's better than making them all wrapper functions,
-        // partly because it avoids any compiler error message citing
-        // the wrapper function definition instead of the user's code,
-        // and mostly because some MVE intrinsics have arguments
-        // required to be compile-time constants, and that property
-        // can't be propagated through a wrapper function. It can be
-        // propagated through a macro, but macros can't be overloaded
-        // on argument types very easily - you have to use _Generic,
-        // which makes error messages very confusing when the user
-        // gets it wrong.)
-        //
-        // Finally, the polymorphic versions of the intrinsics are
-        // also defined with __attribute__(overloadable), so that when
-        // the same name is defined with several type signatures, the
-        // right thing happens. Each one of the overloaded
-        // declarations is given a different builtin id, which
-        // has exactly the effect we want: first clang resolves the
-        // overload to the right function, then it knows which builtin
-        // it's referring to, and then the Sema checking for that
-        // builtin can check further things like the constant
-        // arguments.
-        //
-        // One more subtlety is the newline just before the return
-        // type name. That's a cosmetic tweak to make the error
-        // messages legible if the user gets the types wrong in a call
-        // to a polymorphic function: this way, clang will print just
-        // the _final_ line of each declaration in the header, to show
-        // the type signatures that would have been legal. So all the
-        // confusing machinery with __attribute__ is left out of the
-        // error message, and the user sees something that's more or
-        // less self-documenting: "here's a list of actually readable
-        // type signatures for vfooq(), and here's why each one didn't
-        // match your call".
-
-        OS << "static __inline__ __attribute__(("
-           << (Polymorphic ? "overloadable, " : "")
-           << "__clang_arm_mve_alias(__builtin_arm_mve_" << Int.fullName()
-           << ")))\n"
-           << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
-      }
-    }
-  }
-  for (auto &part : parts)
-    part << "\n";
-
-  // Now we've finished accumulating bits and pieces into the parts[] array.
-  // Put it all together to write the final output file.
-
-  OS << "/*===---- arm_mve.h - ARM MVE intrinsics "
-        "-----------------------------------===\n"
-        " *\n"
-        " *\n"
-        " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
-        "Exceptions.\n"
-        " * See https://llvm.org/LICENSE.txt for license information.\n"
-        " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
-        " *\n"
-        " *===-------------------------------------------------------------"
-        "----"
-        "------===\n"
-        " */\n"
-        "\n"
-        "#ifndef __ARM_MVE_H\n"
-        "#define __ARM_MVE_H\n"
-        "\n"
-        "#if !__ARM_FEATURE_MVE\n"
-        "#error \"MVE support not enabled\"\n"
-        "#endif\n"
-        "\n"
-        "#include <stdint.h>\n"
-        "\n"
-        "#ifdef __cplusplus\n"
-        "extern \"C\" {\n"
-        "#endif\n"
-        "\n";
-
-  for (size_t i = 0; i < NumParts; ++i) {
-    std::vector<std::string> conditions;
-    if (i & Float)
-      conditions.push_back("(__ARM_FEATURE_MVE & 2)");
-    if (i & UseUserNamespace)
-      conditions.push_back("(!defined __ARM_MVE_PRESERVE_USER_NAMESPACE)");
-
-    std::string condition =
-        join(std::begin(conditions), std::end(conditions), " && ");
-    if (!condition.empty())
-      OS << "#if " << condition << "\n\n";
-    OS << parts[i].str();
-    if (!condition.empty())
-      OS << "#endif /* " << condition << " */\n\n";
-  }
-
-  OS << "#ifdef __cplusplus\n"
-        "} /* extern \"C\" */\n"
-        "#endif\n"
-        "\n"
-        "#endif /* __ARM_MVE_H */\n";
-}
-
-void MveEmitter::EmitBuiltinDef(raw_ostream &OS) {
-  for (const auto &kv : ACLEIntrinsics) {
-    const ACLEIntrinsic &Int = *kv.second;
-    OS << "TARGET_HEADER_BUILTIN(__builtin_arm_mve_" << Int.fullName()
-       << ", \"\", \"n\", \"arm_mve.h\", ALL_LANGUAGES, \"\")\n";
-  }
-
-  std::set<std::string> ShortNamesSeen;
-
-  for (const auto &kv : ACLEIntrinsics) {
-    const ACLEIntrinsic &Int = *kv.second;
-    if (Int.polymorphic()) {
-      StringRef Name = Int.shortName();
-      if (ShortNamesSeen.find(std::string(Name)) == ShortNamesSeen.end()) {
-        OS << "BUILTIN(__builtin_arm_mve_" << Name << ", \"vi.\", \"nt";
-        if (Int.nonEvaluating())
-          OS << "u"; // indicate that this builtin doesn't evaluate its args
-        OS << "\")\n";
-        ShortNamesSeen.insert(std::string(Name));
-      }
-    }
-  }
-}
-
-void MveEmitter::EmitBuiltinSema(raw_ostream &OS) {
-  std::map<std::string, std::set<std::string>> Checks;
-
-  for (const auto &kv : ACLEIntrinsics) {
-    const ACLEIntrinsic &Int = *kv.second;
-    std::string Check = Int.genSema();
-    if (!Check.empty())
-      Checks[Check].insert(Int.fullName());
-  }
-
-  for (const auto &kv : Checks) {
-    for (StringRef Name : kv.second)
-      OS << "case ARM::BI__builtin_arm_mve_" << Name << ":\n";
-    OS << kv.first;
-  }
-}
+const char LLVMLicenseHeader[] =
+    " *\n"
+    " *\n"
+    " * Part of the LLVM Project, under the Apache License v2.0 with LLVM"
+    " Exceptions.\n"
+    " * See https://llvm.org/LICENSE.txt for license information.\n"
+    " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+    " *\n"
+    " *===-----------------------------------------------------------------"
+    "------===\n"
+    " */\n"
+    "\n";
 
 // Machinery for the grouping of intrinsics by similar codegen.
 //
@@ -1742,7 +1544,7 @@
   }
 };
 
-void MveEmitter::EmitBuiltinCG(raw_ostream &OS) {
+void EmitterBase::EmitBuiltinCG(raw_ostream &OS) {
   // Pass 1: generate code for all the intrinsics as if every type or constant
   // that can possibly be abstracted out into a parameter variable will be.
   // This identifies the sets of intrinsics we'll group together into a single
@@ -1752,6 +1554,8 @@
 
   for (const auto &kv : ACLEIntrinsics) {
     const ACLEIntrinsic &Int = *kv.second;
+    if (Int.headerOnly())
+      continue;
 
     MergeableGroup MG;
     OutputIntrinsic OI;
@@ -1848,7 +1652,9 @@
     // brace.
     const char *prefix = "";
     for (const auto &OI : kv.second) {
-      OS << prefix << "case ARM::BI__builtin_arm_mve_" << OI.Name << ":";
+      OS << prefix << "case ARM::BI__builtin_arm_" << OI.Int->builtinExtension()
+         << "_" << OI.Name << ":";
+
       prefix = "\n";
     }
     OS << " {\n";
@@ -1867,7 +1673,8 @@
       // individual intrinsic's values.
       OS << "  switch (BuiltinID) {\n";
       for (const auto &OI : kv.second) {
-        OS << "  case ARM::BI__builtin_arm_mve_" << OI.Name << ":\n";
+        OS << "  case ARM::BI__builtin_arm_" << OI.Int->builtinExtension()
+           << "_" << OI.Name << ":\n";
         for (size_t i = 0, e = MG.ParamTypes.size(); i < e; ++i)
           OS << "    Param" << utostr(i) << " = " << OI.ParamValues[i] << ";\n";
         OS << "    break;\n";
@@ -1882,53 +1689,436 @@
   }
 }
 
-void MveEmitter::EmitBuiltinAliases(raw_ostream &OS) {
+void EmitterBase::EmitBuiltinAliases(raw_ostream &OS) {
   // Build a sorted table of:
   // - intrinsic id number
   // - full name
   // - polymorphic name or -1
   StringToOffsetTable StringTable;
-  OS << "struct IntrinToName {\n"
-        "  uint32_t Id;\n"
-        "  int32_t FullName;\n"
-        "  int32_t ShortName;\n"
-        "};\n";
-  OS << "static const IntrinToName Map[] = {\n";
+  OS << "static const IntrinToName MapData[] = {\n";
   for (const auto &kv : ACLEIntrinsics) {
     const ACLEIntrinsic &Int = *kv.second;
+    if (Int.headerOnly())
+      continue;
     int32_t ShortNameOffset =
         Int.polymorphic() ? StringTable.GetOrAddStringOffset(Int.shortName())
                           : -1;
-    OS << "  { ARM::BI__builtin_arm_mve_" << Int.fullName() << ", "
+    OS << "  { ARM::BI__builtin_arm_" << Int.builtinExtension() << "_"
+       << Int.fullName() << ", "
        << StringTable.GetOrAddStringOffset(Int.fullName()) << ", "
        << ShortNameOffset << "},\n";
   }
   OS << "};\n\n";
 
+  OS << "ArrayRef<IntrinToName> Map(MapData);\n\n";
+
   OS << "static const char IntrinNames[] = {\n";
   StringTable.EmitString(OS);
   OS << "};\n\n";
+}
 
-  OS << "auto It = std::lower_bound(std::begin(Map), "
-        "std::end(Map), BuiltinID,\n"
-        "  [](const IntrinToName &L, unsigned Id) {\n"
-        "    return L.Id < Id;\n"
-        "  });\n";
-  OS << "if (It == std::end(Map) || It->Id != BuiltinID)\n"
-        "  return false;\n";
-  OS << "StringRef FullName(&IntrinNames[It->FullName]);\n";
-  OS << "if (AliasName == FullName)\n"
-        "  return true;\n";
-  OS << "if (It->ShortName == -1)\n"
-        "  return false;\n";
-  OS << "StringRef ShortName(&IntrinNames[It->ShortName]);\n";
-  OS << "return AliasName == ShortName;\n";
+void EmitterBase::GroupSemaChecks(
+    std::map<std::string, std::set<std::string>> &Checks) {
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+    if (Int.headerOnly())
+      continue;
+    std::string Check = Int.genSema();
+    if (!Check.empty())
+      Checks[Check].insert(Int.fullName());
+  }
+}
+
+// -----------------------------------------------------------------------------
+// The class used for generating arm_mve.h and related Clang bits
+//
+
+class MveEmitter : public EmitterBase {
+public:
+  MveEmitter(RecordKeeper &Records) : EmitterBase(Records){};
+  void EmitHeader(raw_ostream &OS) override;
+  void EmitBuiltinDef(raw_ostream &OS) override;
+  void EmitBuiltinSema(raw_ostream &OS) override;
+};
+
+void MveEmitter::EmitHeader(raw_ostream &OS) {
+  // Accumulate pieces of the header file that will be enabled under various
+  // different combinations of #ifdef. The index into parts[] is made up of
+  // the following bit flags.
+  constexpr unsigned Float = 1;
+  constexpr unsigned UseUserNamespace = 2;
+
+  constexpr unsigned NumParts = 4;
+  raw_self_contained_string_ostream parts[NumParts];
+
+  // Write typedefs for all the required vector types, and a few scalar
+  // types that don't already have the name we want them to have.
+
+  parts[0] << "typedef uint16_t mve_pred16_t;\n";
+  parts[Float] << "typedef __fp16 float16_t;\n"
+                  "typedef float float32_t;\n";
+  for (const auto &kv : ScalarTypes) {
+    const ScalarType *ST = kv.second.get();
+    if (ST->hasNonstandardName())
+      continue;
+    raw_ostream &OS = parts[ST->requiresFloat() ? Float : 0];
+    const VectorType *VT = getVectorType(ST);
+
+    OS << "typedef __attribute__((__neon_vector_type__(" << VT->lanes()
+       << "), __clang_arm_mve_strict_polymorphism)) " << ST->cName() << " "
+       << VT->cName() << ";\n";
+
+    // Every vector type also comes with a pair of multi-vector types for
+    // the VLD2 and VLD4 instructions.
+    for (unsigned n = 2; n <= 4; n += 2) {
+      const MultiVectorType *MT = getMultiVectorType(n, VT);
+      OS << "typedef struct { " << VT->cName() << " val[" << n << "]; } "
+         << MT->cName() << ";\n";
+    }
+  }
+  parts[0] << "\n";
+  parts[Float] << "\n";
+
+  // Write declarations for all the intrinsics.
+
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+
+    // We generate each intrinsic twice, under its full unambiguous
+    // name and its shorter polymorphic name (if the latter exists).
+    for (bool Polymorphic : {false, true}) {
+      if (Polymorphic && !Int.polymorphic())
+        continue;
+      if (!Polymorphic && Int.polymorphicOnly())
+        continue;
+
+      // We also generate each intrinsic under a name like __arm_vfooq
+      // (which is in C language implementation namespace, so it's
+      // safe to define in any conforming user program) and a shorter
+      // one like vfooq (which is in user namespace, so a user might
+      // reasonably have used it for something already). If so, they
+      // can #define __ARM_MVE_PRESERVE_USER_NAMESPACE before
+      // including the header, which will suppress the shorter names
+      // and leave only the implementation-namespace ones. Then they
+      // have to write __arm_vfooq everywhere, of course.
+
+      for (bool UserNamespace : {false, true}) {
+        raw_ostream &OS = parts[(Int.requiresFloat() ? Float : 0) |
+                                (UserNamespace ? UseUserNamespace : 0)];
+
+        // Make the name of the function in this declaration.
+
+        std::string FunctionName =
+            Polymorphic ? Int.shortName() : Int.fullName();
+        if (!UserNamespace)
+          FunctionName = "__arm_" + FunctionName;
+
+        // Make strings for the types involved in the function's
+        // prototype.
+
+        std::string RetTypeName = Int.returnType()->cName();
+        if (!StringRef(RetTypeName).endswith("*"))
+          RetTypeName += " ";
+
+        std::vector<std::string> ArgTypeNames;
+        for (const Type *ArgTypePtr : Int.argTypes())
+          ArgTypeNames.push_back(ArgTypePtr->cName());
+        std::string ArgTypesString =
+            join(std::begin(ArgTypeNames), std::end(ArgTypeNames), ", ");
+
+        // Emit the actual declaration. All these functions are
+        // declared 'static inline' without a body, which is fine
+        // provided clang recognizes them as builtins, and has the
+        // effect that this type signature is used in place of the one
+        // that Builtins.def didn't provide. That's how we can get
+        // structure types that weren't defined until this header was
+        // included to be part of the type signature of a builtin that
+        // was known to clang already.
+        //
+        // The declarations use __attribute__(__clang_arm_builtin_alias),
+        // so that each function declared will be recognized as the
+        // appropriate MVE builtin in spite of its user-facing name.
+        //
+        // (That's better than making them all wrapper functions,
+        // partly because it avoids any compiler error message citing
+        // the wrapper function definition instead of the user's code,
+        // and mostly because some MVE intrinsics have arguments
+        // required to be compile-time constants, and that property
+        // can't be propagated through a wrapper function. It can be
+        // propagated through a macro, but macros can't be overloaded
+        // on argument types very easily - you have to use _Generic,
+        // which makes error messages very confusing when the user
+        // gets it wrong.)
+        //
+        // Finally, the polymorphic versions of the intrinsics are
+        // also defined with __attribute__(overloadable), so that when
+        // the same name is defined with several type signatures, the
+        // right thing happens. Each one of the overloaded
+        // declarations is given a different builtin id, which
+        // has exactly the effect we want: first clang resolves the
+        // overload to the right function, then it knows which builtin
+        // it's referring to, and then the Sema checking for that
+        // builtin can check further things like the constant
+        // arguments.
+        //
+        // One more subtlety is the newline just before the return
+        // type name. That's a cosmetic tweak to make the error
+        // messages legible if the user gets the types wrong in a call
+        // to a polymorphic function: this way, clang will print just
+        // the _final_ line of each declaration in the header, to show
+        // the type signatures that would have been legal. So all the
+        // confusing machinery with __attribute__ is left out of the
+        // error message, and the user sees something that's more or
+        // less self-documenting: "here's a list of actually readable
+        // type signatures for vfooq(), and here's why each one didn't
+        // match your call".
+
+        OS << "static __inline__ __attribute__(("
+           << (Polymorphic ? "overloadable, " : "")
+           << "__clang_arm_builtin_alias(__builtin_arm_mve_" << Int.fullName()
+           << ")))\n"
+           << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
+      }
+    }
+  }
+  for (auto &part : parts)
+    part << "\n";
+
+  // Now we've finished accumulating bits and pieces into the parts[] array.
+  // Put it all together to write the final output file.
+
+  OS << "/*===---- arm_mve.h - ARM MVE intrinsics "
+        "-----------------------------------===\n"
+     << LLVMLicenseHeader
+     << "#ifndef __ARM_MVE_H\n"
+        "#define __ARM_MVE_H\n"
+        "\n"
+        "#if !__ARM_FEATURE_MVE\n"
+        "#error \"MVE support not enabled\"\n"
+        "#endif\n"
+        "\n"
+        "#include <stdint.h>\n"
+        "\n"
+        "#ifdef __cplusplus\n"
+        "extern \"C\" {\n"
+        "#endif\n"
+        "\n";
+
+  for (size_t i = 0; i < NumParts; ++i) {
+    std::vector<std::string> conditions;
+    if (i & Float)
+      conditions.push_back("(__ARM_FEATURE_MVE & 2)");
+    if (i & UseUserNamespace)
+      conditions.push_back("(!defined __ARM_MVE_PRESERVE_USER_NAMESPACE)");
+
+    std::string condition =
+        join(std::begin(conditions), std::end(conditions), " && ");
+    if (!condition.empty())
+      OS << "#if " << condition << "\n\n";
+    OS << parts[i].str();
+    if (!condition.empty())
+      OS << "#endif /* " << condition << " */\n\n";
+  }
+
+  OS << "#ifdef __cplusplus\n"
+        "} /* extern \"C\" */\n"
+        "#endif\n"
+        "\n"
+        "#endif /* __ARM_MVE_H */\n";
+}
+
+void MveEmitter::EmitBuiltinDef(raw_ostream &OS) {
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+    OS << "TARGET_HEADER_BUILTIN(__builtin_arm_mve_" << Int.fullName()
+       << ", \"\", \"n\", \"arm_mve.h\", ALL_LANGUAGES, \"\")\n";
+  }
+
+  std::set<std::string> ShortNamesSeen;
+
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+    if (Int.polymorphic()) {
+      StringRef Name = Int.shortName();
+      if (ShortNamesSeen.find(std::string(Name)) == ShortNamesSeen.end()) {
+        OS << "BUILTIN(__builtin_arm_mve_" << Name << ", \"vi.\", \"nt";
+        if (Int.nonEvaluating())
+          OS << "u"; // indicate that this builtin doesn't evaluate its args
+        OS << "\")\n";
+        ShortNamesSeen.insert(std::string(Name));
+      }
+    }
+  }
+}
+
+void MveEmitter::EmitBuiltinSema(raw_ostream &OS) {
+  std::map<std::string, std::set<std::string>> Checks;
+  GroupSemaChecks(Checks);
+
+  for (const auto &kv : Checks) {
+    for (StringRef Name : kv.second)
+      OS << "case ARM::BI__builtin_arm_mve_" << Name << ":\n";
+    OS << "  return " << kv.first;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// The class used for generating arm_cde.h and related Clang bits
+//
+
+class CdeEmitter : public EmitterBase {
+public:
+  CdeEmitter(RecordKeeper &Records) : EmitterBase(Records){};
+  void EmitHeader(raw_ostream &OS) override;
+  void EmitBuiltinDef(raw_ostream &OS) override;
+  void EmitBuiltinSema(raw_ostream &OS) override;
+};
+
+void CdeEmitter::EmitHeader(raw_ostream &OS) {
+  // Accumulate pieces of the header file that will be enabled under various
+  // different combinations of #ifdef. The index into parts[] is one of the
+  // following:
+  constexpr unsigned None = 0;
+  constexpr unsigned MVE = 1;
+  constexpr unsigned MVEFloat = 2;
+
+  constexpr unsigned NumParts = 3;
+  raw_self_contained_string_ostream parts[NumParts];
+
+  // Write typedefs for all the required vector types, and a few scalar
+  // types that don't already have the name we want them to have.
+
+  parts[MVE] << "typedef uint16_t mve_pred16_t;\n";
+  parts[MVEFloat] << "typedef __fp16 float16_t;\n"
+                     "typedef float float32_t;\n";
+  for (const auto &kv : ScalarTypes) {
+    const ScalarType *ST = kv.second.get();
+    if (ST->hasNonstandardName())
+      continue;
+    raw_ostream &OS = parts[ST->requiresFloat() ? MVEFloat : MVE];
+    const VectorType *VT = getVectorType(ST);
+
+    OS << "typedef __attribute__((__neon_vector_type__(" << VT->lanes()
+       << "), __clang_arm_mve_strict_polymorphism)) " << ST->cName() << " "
+       << VT->cName() << ";\n";
+  }
+  parts[MVE] << "\n";
+  parts[MVEFloat] << "\n";
+
+  // Write declarations for all the intrinsics.
+
+  for (const auto &kv : ACLEIntrinsics) {
+    const ACLEIntrinsic &Int = *kv.second;
+
+    // We generate each intrinsic twice, under its full unambiguous
+    // name and its shorter polymorphic name (if the latter exists).
+    for (bool Polymorphic : {false, true}) {
+      if (Polymorphic && !Int.polymorphic())
+        continue;
+      if (!Polymorphic && Int.polymorphicOnly())
+        continue;
+
+      raw_ostream &OS =
+          parts[Int.requiresFloat() ? MVEFloat
+                                    : Int.requiresMVE() ? MVE : None];
+
+      // Make the name of the function in this declaration.
+      std::string FunctionName =
+          "__arm_" + (Polymorphic ? Int.shortName() : Int.fullName());
+
+      // Make strings for the types involved in the function's
+      // prototype.
+      std::string RetTypeName = Int.returnType()->cName();
+      if (!StringRef(RetTypeName).endswith("*"))
+        RetTypeName += " ";
+
+      std::vector<std::string> ArgTypeNames;
+      for (const Type *ArgTypePtr : Int.argTypes())
+        ArgTypeNames.push_back(ArgTypePtr->cName());
+      std::string ArgTypesString =
+          join(std::begin(ArgTypeNames), std::end(ArgTypeNames), ", ");
+
+      // Emit the actual declaration. See MveEmitter::EmitHeader for detailed
+      // comments
+      OS << "static __inline__ __attribute__(("
+         << (Polymorphic ? "overloadable, " : "")
+         << "__clang_arm_builtin_alias(__builtin_arm_" << Int.builtinExtension()
+         << "_" << Int.fullName() << ")))\n"
+         << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
+    }
+  }
+
+  for (auto &part : parts)
+    part << "\n";
+
+  // Now we've finished accumulating bits and pieces into the parts[] array.
+  // Put it all together to write the final output file.
+
+  OS << "/*===---- arm_cde.h - ARM CDE intrinsics "
+        "-----------------------------------===\n"
+     << LLVMLicenseHeader
+     << "#ifndef __ARM_CDE_H\n"
+        "#define __ARM_CDE_H\n"
+        "\n"
+        "#if !__ARM_FEATURE_CDE\n"
+        "#error \"CDE support not enabled\"\n"
+        "#endif\n"
+        "\n"
+        "#include <stdint.h>\n"
+        "\n"
+        "#ifdef __cplusplus\n"
+        "extern \"C\" {\n"
+        "#endif\n"
+        "\n";
+
+  for (size_t i = 0; i < NumParts; ++i) {
+    std::string condition;
+    if (i == MVEFloat)
+      condition = "__ARM_FEATURE_MVE & 2";
+    else if (i == MVE)
+      condition = "__ARM_FEATURE_MVE";
+
+    if (!condition.empty())
+      OS << "#if " << condition << "\n\n";
+    OS << parts[i].str();
+    if (!condition.empty())
+      OS << "#endif /* " << condition << " */\n\n";
+  }
+
+  OS << "#ifdef __cplusplus\n"
+        "} /* extern \"C\" */\n"
+        "#endif\n"
+        "\n"
+        "#endif /* __ARM_CDE_H */\n";
+}
+
+void CdeEmitter::EmitBuiltinDef(raw_ostream &OS) {
+  for (const auto &kv : ACLEIntrinsics) {
+    if (kv.second->headerOnly())
+      continue;
+    const ACLEIntrinsic &Int = *kv.second;
+    OS << "TARGET_HEADER_BUILTIN(__builtin_arm_cde_" << Int.fullName()
+       << ", \"\", \"ncU\", \"arm_cde.h\", ALL_LANGUAGES, \"\")\n";
+  }
+}
+
+void CdeEmitter::EmitBuiltinSema(raw_ostream &OS) {
+  std::map<std::string, std::set<std::string>> Checks;
+  GroupSemaChecks(Checks);
+
+  for (const auto &kv : Checks) {
+    for (StringRef Name : kv.second)
+      OS << "case ARM::BI__builtin_arm_cde_" << Name << ":\n";
+    OS << "  Err = " << kv.first << "  break;\n";
+  }
 }
 
 } // namespace
 
 namespace clang {
 
+// MVE
+
 void EmitMveHeader(RecordKeeper &Records, raw_ostream &OS) {
   MveEmitter(Records).EmitHeader(OS);
 }
@@ -1949,4 +2139,26 @@
   MveEmitter(Records).EmitBuiltinAliases(OS);
 }
 
+// CDE
+
+void EmitCdeHeader(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitHeader(OS);
+}
+
+void EmitCdeBuiltinDef(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinDef(OS);
+}
+
+void EmitCdeBuiltinSema(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinSema(OS);
+}
+
+void EmitCdeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinCG(OS);
+}
+
+void EmitCdeBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) {
+  CdeEmitter(Records).EmitBuiltinAliases(OS);
+}
+
 } // end namespace clang
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 6ba90ce..3d8f6dc 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -70,6 +70,11 @@
   GenArmMveBuiltinSema,
   GenArmMveBuiltinCG,
   GenArmMveBuiltinAliases,
+  GenArmCdeHeader,
+  GenArmCdeBuiltinDef,
+  GenArmCdeBuiltinSema,
+  GenArmCdeBuiltinCG,
+  GenArmCdeBuiltinAliases,
   GenAttrDocs,
   GenDiagDocs,
   GenOptDocs,
@@ -190,6 +195,16 @@
                    "Generate ARM MVE builtin code-generator for clang"),
         clEnumValN(GenArmMveBuiltinAliases, "gen-arm-mve-builtin-aliases",
                    "Generate list of valid ARM MVE builtin aliases for clang"),
+        clEnumValN(GenArmCdeHeader, "gen-arm-cde-header",
+                   "Generate arm_cde.h for clang"),
+        clEnumValN(GenArmCdeBuiltinDef, "gen-arm-cde-builtin-def",
+                   "Generate ARM CDE builtin definitions for clang"),
+        clEnumValN(GenArmCdeBuiltinSema, "gen-arm-cde-builtin-sema",
+                   "Generate ARM CDE builtin sema checks for clang"),
+        clEnumValN(GenArmCdeBuiltinCG, "gen-arm-cde-builtin-codegen",
+                   "Generate ARM CDE builtin code-generator for clang"),
+        clEnumValN(GenArmCdeBuiltinAliases, "gen-arm-cde-builtin-aliases",
+                   "Generate list of valid ARM CDE builtin aliases for clang"),
         clEnumValN(GenAttrDocs, "gen-attr-docs",
                    "Generate attribute documentation"),
         clEnumValN(GenDiagDocs, "gen-diag-docs",
@@ -351,6 +366,21 @@
   case GenArmMveBuiltinAliases:
     EmitMveBuiltinAliases(Records, OS);
     break;
+  case GenArmCdeHeader:
+    EmitCdeHeader(Records, OS);
+    break;
+  case GenArmCdeBuiltinDef:
+    EmitCdeBuiltinDef(Records, OS);
+    break;
+  case GenArmCdeBuiltinSema:
+    EmitCdeBuiltinSema(Records, OS);
+    break;
+  case GenArmCdeBuiltinCG:
+    EmitCdeBuiltinCG(Records, OS);
+    break;
+  case GenArmCdeBuiltinAliases:
+    EmitCdeBuiltinAliases(Records, OS);
+    break;
   case GenAttrDocs:
     EmitClangAttrDocs(Records, OS);
     break;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 7ac2e0e..cc30031 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -97,6 +97,12 @@
 void EmitMveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitMveBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
+void EmitCdeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitCdeBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+
 void EmitClangAttrDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangDiagDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangOptDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);