[HLSL] Implement default constant buffer `$Globals` (#125807)

All variable declarations in the global scope that are not resources,
static or empty are implicitly added to implicit constant buffer
`$Globals`. They are created in `hlsl_constant` address space and
collected in an implicit `HLSLBufferDecl` node that is added to the AST
at the end of the translation unit. Codegen is the same as for explicit
constant buffers.

Fixes #123801
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 0f96bf0..86e6d14 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -5045,6 +5045,11 @@
   // LayoutStruct - Layout struct for the buffer
   CXXRecordDecl *LayoutStruct;
 
+  // For default (implicit) constant buffer, a lisf of references of global
+  // decls that belong to the buffer. The decls are already parented by the
+  // translation unit context.
+  SmallVector<Decl *> DefaultBufferDecls;
+
   HLSLBufferDecl(DeclContext *DC, bool CBuffer, SourceLocation KwLoc,
                  IdentifierInfo *ID, SourceLocation IDLoc,
                  SourceLocation LBrace);
@@ -5054,6 +5059,8 @@
                                 bool CBuffer, SourceLocation KwLoc,
                                 IdentifierInfo *ID, SourceLocation IDLoc,
                                 SourceLocation LBrace);
+  static HLSLBufferDecl *CreateDefaultCBuffer(ASTContext &C,
+                                              DeclContext *LexicalParent);
   static HLSLBufferDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID);
 
   SourceRange getSourceRange() const override LLVM_READONLY {
@@ -5068,6 +5075,7 @@
   bool hasValidPackoffset() const { return HasValidPackoffset; }
   const CXXRecordDecl *getLayoutStruct() const { return LayoutStruct; }
   void addLayoutStruct(CXXRecordDecl *LS);
+  void addDefaultBufferDecl(Decl *D);
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
@@ -5079,6 +5087,28 @@
     return static_cast<HLSLBufferDecl *>(const_cast<DeclContext *>(DC));
   }
 
+  // Iterator for the buffer decls. For constant buffers explicitly declared
+  // with `cbuffer` keyword this will the list of decls parented by this
+  // HLSLBufferDecl (equal to `decls()`).
+  // For implicit $Globals buffer this will be the list of default buffer
+  // declarations stored in DefaultBufferDecls plus the implicit layout
+  // struct (the only child of HLSLBufferDecl in this case).
+  //
+  // The iterator uses llvm::concat_iterator to concatenate the lists
+  // `decls()` and `DefaultBufferDecls`. For non-default buffers
+  // `DefaultBufferDecls` is always empty.
+  using buffer_decl_iterator =
+      llvm::concat_iterator<Decl *const, SmallVector<Decl *>::const_iterator,
+                            decl_iterator>;
+  using buffer_decl_range = llvm::iterator_range<buffer_decl_iterator>;
+
+  buffer_decl_range buffer_decls() const {
+    return buffer_decl_range(buffer_decls_begin(), buffer_decls_end());
+  }
+  buffer_decl_iterator buffer_decls_begin() const;
+  buffer_decl_iterator buffer_decls_end() const;
+  bool buffer_decls_empty();
+
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
 };
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index 4f4bbe9..c4006fd 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -105,13 +105,13 @@
                          HLSLParamModifierAttr::Spelling Spelling);
   void ActOnTopLevelFunction(FunctionDecl *FD);
   void ActOnVariableDeclarator(VarDecl *VD);
+  void ActOnEndOfTranslationUnit(TranslationUnitDecl *TU);
   void CheckEntryPoint(FunctionDecl *FD);
   void CheckSemanticAnnotation(FunctionDecl *EntryPoint, const Decl *Param,
                                const HLSLAnnotationAttr *AnnotationAttr);
   void DiagnoseAttrStageMismatch(
       const Attr *A, llvm::Triple::EnvironmentType Stage,
       std::initializer_list<llvm::Triple::EnvironmentType> AllowedStages);
-  void DiagnoseAvailabilityViolations(TranslationUnitDecl *TU);
 
   QualType handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS,
                                        QualType LHSType, QualType RHSType,
@@ -168,11 +168,16 @@
   // List of all resource bindings
   ResourceBindings Bindings;
 
+  // default constant buffer $Globals
+  HLSLBufferDecl *DefaultCBuffer;
+
 private:
   void collectResourceBindingsOnVarDecl(VarDecl *D);
   void collectResourceBindingsOnUserRecordDecl(const VarDecl *VD,
                                                const RecordType *RT);
   void processExplicitBindingsOnDecl(VarDecl *D);
+
+  void diagnoseAvailabilityViolations(TranslationUnitDecl *TU);
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 5a3be16..9c0be5c 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -57,6 +57,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -5745,6 +5746,17 @@
   return Result;
 }
 
+HLSLBufferDecl *
+HLSLBufferDecl::CreateDefaultCBuffer(ASTContext &C,
+                                     DeclContext *LexicalParent) {
+  DeclContext *DC = LexicalParent;
+  IdentifierInfo *II = &C.Idents.get("$Globals", tok::TokenKind::identifier);
+  HLSLBufferDecl *Result = new (C, DC) HLSLBufferDecl(
+      DC, true, SourceLocation(), II, SourceLocation(), SourceLocation());
+  Result->setImplicit(true);
+  return Result;
+}
+
 HLSLBufferDecl *HLSLBufferDecl::CreateDeserialized(ASTContext &C,
                                                    GlobalDeclID ID) {
   return new (C, ID) HLSLBufferDecl(nullptr, false, SourceLocation(), nullptr,
@@ -5757,6 +5769,30 @@
   addDecl(LS);
 }
 
+void HLSLBufferDecl::addDefaultBufferDecl(Decl *D) {
+  assert(isImplicit() &&
+         "default decls can only be added to the implicit/default constant "
+         "buffer $Globals");
+  DefaultBufferDecls.push_back(D);
+}
+
+HLSLBufferDecl::buffer_decl_iterator
+HLSLBufferDecl::buffer_decls_begin() const {
+  return buffer_decl_iterator(llvm::iterator_range(DefaultBufferDecls.begin(),
+                                                   DefaultBufferDecls.end()),
+                              decl_range(decls_begin(), decls_end()));
+}
+
+HLSLBufferDecl::buffer_decl_iterator HLSLBufferDecl::buffer_decls_end() const {
+  return buffer_decl_iterator(
+      llvm::iterator_range(DefaultBufferDecls.end(), DefaultBufferDecls.end()),
+      decl_range(decls_end(), decls_end()));
+}
+
+bool HLSLBufferDecl::buffer_decls_empty() {
+  return DefaultBufferDecls.empty() && decls_empty();
+}
+
 //===----------------------------------------------------------------------===//
 // ImportDecl Implementation
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 547220f..ed6d203 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -116,7 +116,7 @@
   BufGlobals.push_back(ValueAsMetadata::get(BufGV));
 
   const auto *ElemIt = LayoutStruct->element_begin();
-  for (Decl *D : BufDecl->decls()) {
+  for (Decl *D : BufDecl->buffer_decls()) {
     if (isa<CXXRecordDecl, EmptyDecl>(D))
       // Nothing to do for this declaration.
       continue;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 7924c32..1b7d0ac 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -5513,6 +5513,11 @@
   if (getLangOpts().OpenCL && ASTTy->isSamplerT())
     return;
 
+  // HLSL default buffer constants will be emitted during HLSLBufferDecl codegen
+  if (getLangOpts().HLSL &&
+      D->getType().getAddressSpace() == LangAS::hlsl_constant)
+    return;
+
   // If this is OpenMP device, check if it is legal to emit this global
   // normally.
   if (LangOpts.OpenMPIsTargetDevice && OpenMPRuntime &&
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 145cda6..c699e92 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1417,8 +1417,7 @@
   }
 
   if (LangOpts.HLSL)
-    HLSL().DiagnoseAvailabilityViolations(
-        getASTContext().getTranslationUnitDecl());
+    HLSL().ActOnEndOfTranslationUnit(getASTContext().getTranslationUnitDecl());
 
   // If there were errors, disable 'unused' warnings since they will mostly be
   // noise. Don't warn for a use from a module: either we should warn on all
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index d26d85d..b298423 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -9,6 +9,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Sema/SemaHLSL.h"
+#include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Attrs.inc"
@@ -147,7 +148,7 @@
   return DeclToBindingListIndex.contains(VD);
 }
 
-SemaHLSL::SemaHLSL(Sema &S) : SemaBase(S) {}
+SemaHLSL::SemaHLSL(Sema &S) : SemaBase(S), DefaultCBuffer(nullptr) {}
 
 Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
                                  SourceLocation KwLoc, IdentifierInfo *Ident,
@@ -225,7 +226,7 @@
   // or on none.
   bool HasPackOffset = false;
   bool HasNonPackOffset = false;
-  for (auto *Field : BufDecl->decls()) {
+  for (auto *Field : BufDecl->buffer_decls()) {
     VarDecl *Var = dyn_cast<VarDecl>(Field);
     if (!Var)
       continue;
@@ -492,7 +493,7 @@
   LS->setImplicit(true);
   LS->startDefinition();
 
-  for (Decl *D : BufDecl->decls()) {
+  for (Decl *D : BufDecl->buffer_decls()) {
     VarDecl *VD = dyn_cast<VarDecl>(D);
     if (!VD || VD->getStorageClass() == SC_Static ||
         VD->getType().getAddressSpace() == LangAS::hlsl_groupshared)
@@ -1928,7 +1929,19 @@
 
 } // namespace
 
-void SemaHLSL::DiagnoseAvailabilityViolations(TranslationUnitDecl *TU) {
+void SemaHLSL::ActOnEndOfTranslationUnit(TranslationUnitDecl *TU) {
+  // process default CBuffer - create buffer layout struct and invoke codegenCGH
+  if (DefaultCBuffer) {
+    SemaRef.getCurLexicalContext()->addDecl(DefaultCBuffer);
+    createHostLayoutStructForBuffer(SemaRef, DefaultCBuffer);
+
+    DeclGroupRef DG(DefaultCBuffer);
+    SemaRef.Consumer.HandleTopLevelDecl(DG);
+  }
+  diagnoseAvailabilityViolations(TU);
+}
+
+void SemaHLSL::diagnoseAvailabilityViolations(TranslationUnitDecl *TU) {
   // Skip running the diagnostics scan if the diagnostic mode is
   // strict (-fhlsl-strict-availability) and the target shader stage is known
   // because all relevant diagnostics were already emitted in the
@@ -2991,6 +3004,14 @@
   return Ty;
 }
 
+static bool IsDefaultBufferConstantDecl(VarDecl *VD) {
+  QualType QT = VD->getType();
+  return VD->getDeclContext()->isTranslationUnit() &&
+         QT.getAddressSpace() == LangAS::Default &&
+         VD->getStorageClass() != SC_Static &&
+         !isInvalidConstantBufferLeafElementType(QT.getTypePtr());
+}
+
 void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) {
   if (VD->hasGlobalStorage()) {
     // make sure the declaration has a complete type
@@ -3002,7 +3023,21 @@
       return;
     }
 
-    // find all resources on decl
+    // Global variables outside a cbuffer block that are not a resource, static,
+    // groupshared, or an empty array or struct belong to the default constant
+    // buffer $Globals
+    if (IsDefaultBufferConstantDecl(VD)) {
+      if (DefaultCBuffer == nullptr)
+        DefaultCBuffer = HLSLBufferDecl::CreateDefaultCBuffer(
+            SemaRef.getASTContext(), SemaRef.getCurLexicalContext());
+      // update address space to hlsl_constant
+      QualType NewTy = getASTContext().getAddrSpaceQualType(
+          VD->getType(), LangAS::hlsl_constant);
+      VD->setType(NewTy);
+      DefaultCBuffer->addDefaultBufferDecl(VD);
+    }
+
+    // find all resources bindings on decl
     if (VD->getType()->isHLSLIntangibleType())
       collectResourceBindingsOnVarDecl(VD);
 
diff --git a/clang/test/AST/HLSL/default_cbuffer.hlsl b/clang/test/AST/HLSL/default_cbuffer.hlsl
new file mode 100644
index 0000000..9e0fce7
--- /dev/null
+++ b/clang/test/AST/HLSL/default_cbuffer.hlsl
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -ast-dump -o - %s | FileCheck %s
+
+struct EmptyStruct {
+};
+
+struct S {
+  RWBuffer<float> buf;
+  EmptyStruct es;
+  float ea[0];
+  float b;
+};
+
+// CHECK: VarDecl {{.*}} used a 'hlsl_constant float'
+float a;
+
+// CHECK: VarDecl {{.*}} b 'RWBuffer<float>':'hlsl::RWBuffer<float>'
+RWBuffer<float> b; 
+
+// CHECK: VarDecl {{.*}} c 'EmptyStruct'
+EmptyStruct c;
+
+// CHECK: VarDecl {{.*}} d 'float[0]'
+float d[0];
+
+// CHECK: VarDecl {{.*}} e 'RWBuffer<float>[2]'
+RWBuffer<float> e[2];
+
+// CHECK: VarDecl {{.*}} f 'groupshared float'
+groupshared float f;
+
+// CHECK: VarDecl {{.*}} g 'hlsl_constant float'
+float g;
+
+// CHECK: VarDecl {{.*}} h 'hlsl_constant S'
+S h;
+
+// CHECK: HLSLBufferDecl {{.*}} implicit cbuffer $Globals
+// CHECK: CXXRecordDecl {{.*}} implicit struct __cblayout_$Globals definition
+// CHECK: PackedAttr
+// CHECK-NEXT: FieldDecl {{.*}} a 'float'
+// CHECK-NEXT: FieldDecl {{.*}} g 'float'
+// CHECK-NEXT: FieldDecl {{.*}} h '__cblayout_S'
+
+// CHECK: CXXRecordDecl {{.*}} implicit struct __cblayout_S definition
+// CHECK: PackedAttr {{.*}} Implicit
+// CHECK-NEXT: FieldDecl {{.*}} b 'float'
+
+export float foo() {
+  return a;
+}
diff --git a/clang/test/CodeGenHLSL/basic_types.hlsl b/clang/test/CodeGenHLSL/basic_types.hlsl
index d987af4..3620426 100644
--- a/clang/test/CodeGenHLSL/basic_types.hlsl
+++ b/clang/test/CodeGenHLSL/basic_types.hlsl
@@ -6,38 +6,38 @@
 // RUN:   -emit-llvm -disable-llvm-passes -o - -DNAMESPACED| FileCheck %s
 
 
-// CHECK: @uint16_t_Val = global i16 0, align 2
-// CHECK: @int16_t_Val = global i16 0, align 2
-// CHECK: @uint_Val = global i32 0, align 4
-// CHECK: @uint64_t_Val = global i64 0, align 8
-// CHECK: @int64_t_Val = global i64 0, align 8
-// CHECK: @int16_t2_Val = global <2 x i16> zeroinitializer, align 4
-// CHECK: @int16_t3_Val = global <3 x i16> zeroinitializer, align 8
-// CHECK: @int16_t4_Val = global <4 x i16> zeroinitializer, align 8
-// CHECK: @uint16_t2_Val = global <2 x i16> zeroinitializer, align 4
-// CHECK: @uint16_t3_Val = global <3 x i16> zeroinitializer, align 8
-// CHECK: @uint16_t4_Val = global <4 x i16> zeroinitializer, align 8
-// CHECK: @int2_Val = global <2 x i32> zeroinitializer, align 8
-// CHECK: @int3_Val = global <3 x i32> zeroinitializer, align 16
-// CHECK: @int4_Val = global <4 x i32> zeroinitializer, align 16
-// CHECK: @uint2_Val = global <2 x i32> zeroinitializer, align 8
-// CHECK: @uint3_Val = global <3 x i32> zeroinitializer, align 16
-// CHECK: @uint4_Val = global <4 x i32> zeroinitializer, align 16
-// CHECK: @int64_t2_Val = global <2 x i64> zeroinitializer, align 16
-// CHECK: @int64_t3_Val = global <3 x i64> zeroinitializer, align 32
-// CHECK: @int64_t4_Val = global <4 x i64> zeroinitializer, align 32
-// CHECK: @uint64_t2_Val = global <2 x i64> zeroinitializer, align 16
-// CHECK: @uint64_t3_Val = global <3 x i64> zeroinitializer, align 32
-// CHECK: @uint64_t4_Val = global <4 x i64> zeroinitializer, align 32
-// CHECK: @half2_Val = global <2 x half> zeroinitializer, align 4
-// CHECK: @half3_Val = global <3 x half> zeroinitializer, align 8
-// CHECK: @half4_Val = global <4 x half> zeroinitializer, align 8
-// CHECK: @float2_Val = global <2 x float> zeroinitializer, align 8
-// CHECK: @float3_Val = global <3 x float> zeroinitializer, align 16
-// CHECK: @float4_Val = global <4 x float> zeroinitializer, align 16
-// CHECK: @double2_Val = global <2 x double> zeroinitializer, align 16
-// CHECK: @double3_Val = global <3 x double> zeroinitializer, align 32
-// CHECK: @double4_Val = global <4 x double> zeroinitializer, align 32
+// CHECK: @uint16_t_Val = external addrspace(2) global i16, align 2
+// CHECK: @int16_t_Val = external addrspace(2) global i16, align 2
+// CHECK: @uint_Val = external addrspace(2) global i32, align 4
+// CHECK: @uint64_t_Val = external addrspace(2) global i64, align 8
+// CHECK: @int64_t_Val = external addrspace(2) global i64, align 8
+// CHECK: @int16_t2_Val = external addrspace(2) global <2 x i16>, align 4
+// CHECK: @int16_t3_Val = external addrspace(2) global <3 x i16>, align 8
+// CHECK: @int16_t4_Val = external addrspace(2) global <4 x i16>, align 8
+// CHECK: @uint16_t2_Val = external addrspace(2) global <2 x i16>, align 4
+// CHECK: @uint16_t3_Val = external addrspace(2) global <3 x i16>, align 8
+// CHECK: @uint16_t4_Val = external addrspace(2) global <4 x i16>, align 8
+// CHECK: @int2_Val = external addrspace(2) global <2 x i32>, align 8
+// CHECK: @int3_Val = external addrspace(2) global <3 x i32>, align 16
+// CHECK: @int4_Val = external addrspace(2) global <4 x i32>, align 16
+// CHECK: @uint2_Val = external addrspace(2) global <2 x i32>, align 8
+// CHECK: @uint3_Val = external addrspace(2) global <3 x i32>, align 16
+// CHECK: @uint4_Val = external addrspace(2) global <4 x i32>, align 16
+// CHECK: @int64_t2_Val = external addrspace(2) global <2 x i64>, align 16
+// CHECK: @int64_t3_Val = external addrspace(2) global <3 x i64>, align 32
+// CHECK: @int64_t4_Val = external addrspace(2) global <4 x i64>, align 32
+// CHECK: @uint64_t2_Val = external addrspace(2) global <2 x i64>, align 16
+// CHECK: @uint64_t3_Val = external addrspace(2) global <3 x i64>, align 32
+// CHECK: @uint64_t4_Val = external addrspace(2) global <4 x i64>, align 32
+// CHECK: @half2_Val = external addrspace(2) global <2 x half>, align 4
+// CHECK: @half3_Val = external addrspace(2) global <3 x half>, align 8
+// CHECK: @half4_Val = external addrspace(2) global <4 x half>, align 8
+// CHECK: @float2_Val = external addrspace(2) global <2 x float>, align 8
+// CHECK: @float3_Val = external addrspace(2) global <3 x float>, align 16
+// CHECK: @float4_Val = external addrspace(2) global <4 x float>, align 16
+// CHECK: @double2_Val = external addrspace(2) global <2 x double>, align 16
+// CHECK: @double3_Val = external addrspace(2) global <3 x double>, align 32
+// CHECK: @double4_Val = external addrspace(2) global <4 x double>, align 32
 
 #ifdef NAMESPACED
 #define TYPE_DECL(T)  hlsl::T T##_Val
diff --git a/clang/test/CodeGenHLSL/default_cbuffer.hlsl b/clang/test/CodeGenHLSL/default_cbuffer.hlsl
new file mode 100644
index 0000000..c5176aa
--- /dev/null
+++ b/clang/test/CodeGenHLSL/default_cbuffer.hlsl
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute \
+// RUN:   -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// CHECK: %"__cblayout_$Globals" = type <{ float, float, target("dx.Layout", %__cblayout_S, 4, 0) }>
+// CHECK: %__cblayout_S = type <{ float }>
+
+// CHECK-DAG: @"$Globals.cb" = external constant target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 20, 0, 4, 16))
+// CHECK-DAG: @a = external addrspace(2) global float
+// CHECK-DAG: @g = external addrspace(2) global float
+// CHECK-DAG: @h = external addrspace(2) global target("dx.Layout", %__cblayout_S, 4, 0), align 4
+
+struct EmptyStruct {
+};
+
+struct S {
+  RWBuffer<float> buf;
+  EmptyStruct es;
+  float ea[0];
+  float b;
+};
+
+float a;
+RWBuffer<float> b; 
+EmptyStruct c;
+float d[0];
+RWBuffer<float> e[2];
+groupshared float f;
+float g;
+S h;
+
+RWBuffer<float> Buf;
+
+[numthreads(4,1,1)]
+void main() {
+  Buf[0] = a;
+}
+
+// CHECK: !hlsl.cbs = !{![[CB:.*]]}
+// CHECK: ![[CB]] = !{ptr @"$Globals.cb", ptr addrspace(2) @a, ptr addrspace(2) @g, ptr addrspace(2) @h}