[LLVM] Add validation to check the number of intrinsic args (#196563)

Add validation to check the number of intrinsic args. In service of
that, extend `getIntrinsicInfoTableEntries` to return several things:
* ArrayRef to the Table (as a convenience).
* Number of arguments
* Does the intrinsic have variable arguments.

This avoids some code duplication at various callers of
`getIntrinsicInfoTableEntries`. To have `getIntrinsicInfoTableEntries`
correctly compute the number of arguments based on trip count of the
while loop, move parsing of element type for `IIT_SAME_VEC_WIDTH_ARG` to
a recursive call from `DecodeIITType`.
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index fe93283..868bca40 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -955,21 +955,13 @@
   using namespace llvm::Intrinsic;
 
   SmallVector<IITDescriptor, 8> table;
-  getIntrinsicInfoTableEntries(id, table);
+  auto [tableRef, _, isVarArg] = getIntrinsicInfoTableEntries(id, table);
 
-  ArrayRef<IITDescriptor> tableRef = table;
   mlir::Type resultTy = decodeFixedType(cgf, tableRef, context);
 
   SmallVector<mlir::Type, 8> argTypes;
-  bool isVarArg = false;
-  while (!tableRef.empty()) {
-    IITDescriptor::IITDescriptorKind kind = tableRef.front().Kind;
-    if (kind == IITDescriptor::VarArg) {
-      isVarArg = true;
-      break; // VarArg is last
-    }
+  while (!tableRef.empty())
     argTypes.push_back(decodeFixedType(cgf, tableRef, context));
-  }
 
   // CIR convention: no explicit void return type
   if (isa<cir::VoidType>(resultTy))
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index ae931c7..af97285 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -20,6 +20,7 @@
 #include "llvm/Support/TypeSize.h"
 #include <optional>
 #include <string>
+#include <tuple>
 
 namespace llvm {
 
@@ -259,10 +260,16 @@
 /// Returns true if \p id has a struct return type.
 LLVM_ABI bool hasStructReturnType(ID id);
 
-/// Return the IIT table descriptor for the specified intrinsic into an array
-/// of IITDescriptors.
-LLVM_ABI void getIntrinsicInfoTableEntries(ID id,
-                                           SmallVectorImpl<IITDescriptor> &T);
+/// Fill the IIT table descriptor for the intrinsic \p id into an array
+/// of IITDescriptors. Returns a tuple of 3 values:
+///  - ArrayRef for the descriptor table (for convenience).
+///  - Number of arguments.
+///  - if it's a variable argument intrinsic.
+///
+/// Note that for VarArg intrinsics, the last IIT `VarArg` token will be
+/// consumed and not a part of the returned ArrayRef.
+LLVM_ABI std::tuple<ArrayRef<IITDescriptor>, unsigned, bool>
+getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
 
 /// Returns true if \p FT is a valid function type for intrinsic \p ID. If
 /// `ID` is an overloaded intrinsic, the overload types are pushed into the
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index ff57d33..89aef32 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -38,10 +38,9 @@
 using namespace llvm;
 
 // Forward declaration of static functions.
-static bool isIntrinsicVarArg(ArrayRef<Intrinsic::IITDescriptor> &Infos,
-                              bool Consume);
 static bool isSignatureValid(FunctionType *FTy,
                              ArrayRef<Intrinsic::IITDescriptor> &Infos,
+                             unsigned NumArgs, bool IsVarArg,
                              SmallVectorImpl<Type *> &OverloadTys,
                              raw_ostream &OS);
 
@@ -398,6 +397,8 @@
     unsigned OverloadIndex = Infos[NextElt++];
     OutputTable.push_back(
         IITDescriptor::get(IITDescriptor::SameVecWidth, OverloadIndex));
+    // IIT_SAME_VEC_WIDTH_ARG entry is followed by the element type.
+    DecodeIITType(NextElt, Infos, OutputTable);
     return;
   }
   case IIT_VEC_OF_ANYPTRS_TO_ELT: {
@@ -451,8 +452,9 @@
 #define GET_INTRINSIC_GENERATOR_GLOBAL
 #include "llvm/IR/IntrinsicImpl.inc"
 
-void Intrinsic::getIntrinsicInfoTableEntries(
-    ID id, SmallVectorImpl<IITDescriptor> &T) {
+std::tuple<ArrayRef<Intrinsic::IITDescriptor>, unsigned, bool>
+Intrinsic::getIntrinsicInfoTableEntries(ID id,
+                                        SmallVectorImpl<IITDescriptor> &T) {
   // Note that `FixedEncodingTy` is defined in IntrinsicImpl.inc and can be
   // uint16_t or uint32_t based on the the value of `Use16BitFixedEncoding` in
   // IntrinsicEmitter.cpp.
@@ -497,8 +499,21 @@
 
   // Okay, decode the table into the output vector of IITDescriptors.
   DecodeIITType(NextElt, IITEntries, T);
-  while (IITEntries[NextElt] != IIT_Done)
+  unsigned NumArgs = 0;
+  while (IITEntries[NextElt] != IIT_Done) {
     DecodeIITType(NextElt, IITEntries, T);
+    ++NumArgs;
+  }
+
+  ArrayRef<IITDescriptor> TableRef = T;
+
+  bool IsVarArg = false;
+  if (TableRef.back().Kind == Intrinsic::IITDescriptor::VarArg) {
+    IsVarArg = true;
+    TableRef.consume_back();
+    --NumArgs;
+  }
+  return {TableRef, NumArgs, IsVarArg};
 }
 
 static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
@@ -511,8 +526,6 @@
   switch (D.Kind) {
   case IITDescriptor::Void:
     return Type::getVoidTy(Context);
-  case IITDescriptor::VarArg:
-    return Type::getVoidTy(Context);
   case IITDescriptor::MMX:
     return llvm::FixedVectorType::get(llvm::IntegerType::get(Context, 64), 1);
   case IITDescriptor::AMX:
@@ -589,6 +602,10 @@
     assert(VTy && "Expected overload type to be a Vector Type");
     return VectorType::getInteger(VTy);
   }
+  case IITDescriptor::VarArg:
+    // VarArg token should be consumed by `getIntrinsicInfoTableEntries`, so we
+    // should never see it here.
+    llvm_unreachable("IITDescriptor::VarArg not expected");
   }
   llvm_unreachable("unhandled");
 }
@@ -596,10 +613,7 @@
 FunctionType *Intrinsic::getType(LLVMContext &Context, ID id,
                                  ArrayRef<Type *> OverloadTys) {
   SmallVector<IITDescriptor, 8> Table;
-  getIntrinsicInfoTableEntries(id, Table);
-  ArrayRef<IITDescriptor> TableRef = Table;
-
-  bool IsVarArg = isIntrinsicVarArg(TableRef, /*Consume=*/true);
+  auto [TableRef, _, IsVarArg] = getIntrinsicInfoTableEntries(id, Table);
 
   Type *ResultTy = DecodeFixedType(TableRef, OverloadTys, Context);
 
@@ -777,16 +791,13 @@
 
   // Get the intrinsic signature metadata.
   SmallVector<Intrinsic::IITDescriptor, 8> Table;
-  getIntrinsicInfoTableEntries(id, Table);
-  ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
-  bool IsVarArg = isIntrinsicVarArg(TableRef, /*Consume=*/false);
-
+  auto [TableRef, NumArgs, IsVarArg] = getIntrinsicInfoTableEntries(id, Table);
   FunctionType *FTy = FunctionType::get(RetTy, ArgTys, IsVarArg);
 
   // Automatically determine the overloaded types.
   SmallVector<Type *, 4> OverloadTys;
-  [[maybe_unused]] bool IsValid =
-      ::isSignatureValid(FTy, TableRef, OverloadTys, nulls());
+  [[maybe_unused]] bool IsValid = ::isSignatureValid(
+      FTy, TableRef, NumArgs, IsVarArg, OverloadTys, nulls());
   assert(IsValid && "intrinsic signature mismatch");
   return getOrInsertIntrinsicDeclarationImpl(M, id, OverloadTys, FTy);
 }
@@ -859,8 +870,6 @@
   switch (D.Kind) {
   case IITDescriptor::Void:
     return !Ty->isVoidTy();
-  case IITDescriptor::VarArg:
-    return true;
   case IITDescriptor::MMX: {
     FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty);
     return !VT || VT->getNumElements() != 1 ||
@@ -1050,44 +1059,40 @@
       return true;
     return ThisArgVecTy != VectorType::getInteger(ReferenceType);
   }
+  case IITDescriptor::VarArg:
+    // VarArg token should be consumed by `getIntrinsicInfoTableEntries`, so we
+    // should never see it here.
+    llvm_unreachable("IITDescriptor::VarArg not expected");
   }
   llvm_unreachable("unhandled");
 }
 
-/// Returns true if the intrinsic is a VarArg intrinsics. If \p Consume is true
-/// the IITDescriptor for the VarArg is consumed and removed from \p Infos, else
-/// it stays unchanged.
-static bool isIntrinsicVarArg(ArrayRef<Intrinsic::IITDescriptor> &Infos,
-                              bool Consume) {
-  if (!Infos.empty() && Infos.back().Kind == Intrinsic::IITDescriptor::VarArg) {
-    if (Consume)
-      Infos.consume_back();
-    return true;
-  }
-  return false;
-}
-
 /// Return true if the function type \p FTy is a valid type signature for the
-/// type constraints specified in the .td file, represented by \p Infos.
-/// The overloaded type for the intrinsic are pushed to the OverloadTys vector.
+/// type constraints specified in the .td file, represented by \p Infos and
+/// \p IsVarArg. The overloaded types for the intrinsic are pushed to the
+/// \p OverloadTys vector.
 ///
 /// If the type is not valid, returns false and prints an error message to
 /// \p OS.
 static bool isSignatureValid(FunctionType *FTy,
                              ArrayRef<Intrinsic::IITDescriptor> &Infos,
+                             unsigned NumArgs, bool IsVarArg,
                              SmallVectorImpl<Type *> &OverloadTys,
                              raw_ostream &OS) {
-  bool IsVarArg = isIntrinsicVarArg(Infos, /*Consume=*/true);
-
   SmallVector<DeferredIntrinsicMatchPair, 2> DeferredChecks;
   if (matchIntrinsicType(FTy->getReturnType(), Infos, OverloadTys,
                          DeferredChecks, false)) {
     OS << "intrinsic has incorrect return type!";
     return false;
   }
-
   unsigned NumDeferredReturnChecks = DeferredChecks.size();
 
+  if (FTy->getNumParams() != NumArgs) {
+    OS << "intrinsic has incorrect number of args. Expected " << NumArgs
+       << ", but got " << FTy->getNumParams();
+    return false;
+  }
+
   for (Type *Ty : FTy->params()) {
     if (matchIntrinsicType(Ty, Infos, OverloadTys, DeferredChecks, false)) {
       OS << "intrinsic has incorrect argument type!";
@@ -1137,10 +1142,9 @@
     return false;
 
   SmallVector<Intrinsic::IITDescriptor, 8> Table;
-  getIntrinsicInfoTableEntries(ID, Table);
-  ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+  auto [TableRef, NumArgs, IsVarArg] = getIntrinsicInfoTableEntries(ID, Table);
 
-  return ::isSignatureValid(FT, TableRef, OverloadTys, OS);
+  return ::isSignatureValid(FT, TableRef, NumArgs, IsVarArg, OverloadTys, OS);
 }
 
 bool Intrinsic::isSignatureValid(Function *F,
diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid4.ll b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid4.ll
new file mode 100644
index 0000000..1a4e378
--- /dev/null
+++ b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid4.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Use of intrinsic with an invalid signature should be rejected.
+
+; CHECK: intrinsic has incorrect number of args. Expected 1, but got 2
+define void @test(float %a) {
+  call float @llvm.ceil.f32(float %a, float %a)
+  ret void
+}