diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 19ae887..5e04f32 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -3606,6 +3606,7 @@
     void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const;
     static llvm::Optional<MapTypeTy>
     isDeclareTargetDeclaration(const ValueDecl *VD);
+    static llvm::Optional<OMPDeclareTargetDeclAttr*> getActiveAttr(const ValueDecl *VD);
     static llvm::Optional<DevTypeTy> getDeviceType(const ValueDecl *VD);
     static llvm::Optional<SourceLocation> getLocation(const ValueDecl *VD);
   }];
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 274ed72..9a3f7ad 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1314,7 +1314,15 @@
 def note_omp_assumption_clause_continue_here
     : Note<"the ignored tokens spans until here">;
 def err_omp_declare_target_unexpected_clause: Error<
-  "unexpected '%0' clause, only %select{'to' or 'link'|'to', 'link' or 'device_type'}1 clauses expected">;
+  "unexpected '%0' clause, only %select{'device_type'|'to' or 'link'|'to', 'link' or 'device_type'}1 clauses expected">;
+def err_omp_begin_declare_target_unexpected_implicit_to_clause: Error<
+  "unexpected '(', only 'to', 'link' or 'device_type' clauses expected for 'begin declare target' directive">;
+def err_omp_declare_target_unexpected_clause_after_implicit_to: Error<
+  "unexpected clause after an implicit 'to' clause">;
+def err_omp_declare_target_missing_to_or_link_clause: Error<
+  "expected at least one 'to' or 'link' clause">;
+def err_omp_declare_target_multiple : Error<
+  "%0 appears multiple times in clauses on the same declare target directive">;
 def err_omp_expected_clause: Error<
   "expected at least one clause on '#pragma omp %0' directive">;
 def err_omp_mapper_illegal_identifier : Error<
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c3ada2c..99b4169 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10231,8 +10231,6 @@
   InGroup<OpenMPClauses>;
 def err_omp_invalid_target_decl : Error<
   "%0 used in declare target directive is not a variable or a function name">;
-def err_omp_declare_target_multiple : Error<
-  "%0 appears multiple times in clauses on the same declare target directive">;
 def err_omp_declare_target_to_and_link : Error<
   "%0 must not appear in both clauses 'to' and 'link'">;
 def warn_omp_not_in_target_context : Warning<
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 6f6d469..b5d6212e 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3181,10 +3181,12 @@
   /// Parse 'omp end assumes' directive.
   void ParseOpenMPEndAssumesDirective(SourceLocation Loc);
 
-  /// Parse clauses for '#pragma omp declare target'.
-  DeclGroupPtrTy ParseOMPDeclareTargetClauses();
+  /// Parse clauses for '#pragma omp [begin] declare target'.
+  void ParseOMPDeclareTargetClauses(Sema::DeclareTargetContextInfo &DTCI);
+
   /// Parse '#pragma omp end declare target'.
-  void ParseOMPEndDeclareTargetDirective(OpenMPDirectiveKind DKind,
+  void ParseOMPEndDeclareTargetDirective(OpenMPDirectiveKind BeginDKind,
+                                         OpenMPDirectiveKind EndDKind,
                                          SourceLocation Loc);
 
   /// Skip tokens until a `annot_pragma_openmp_end` was found. Emit a warning if
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 95d5868..d2a0745 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10209,8 +10209,31 @@
   //
 private:
   void *VarDataSharingAttributesStack;
+
+  struct DeclareTargetContextInfo {
+    struct MapInfo {
+      OMPDeclareTargetDeclAttr::MapTypeTy MT;
+      SourceLocation Loc;
+    };
+    /// Explicitly listed variables and functions in a 'to' or 'link' clause.
+    llvm::DenseMap<NamedDecl *, MapInfo> ExplicitlyMapped;
+
+    /// The 'device_type' as parsed from the clause.
+    OMPDeclareTargetDeclAttr::DevTypeTy DT = OMPDeclareTargetDeclAttr::DT_Any;
+
+    /// The directive kind, `begin declare target` or `declare target`.
+    OpenMPDirectiveKind Kind;
+
+    /// The directive location.
+    SourceLocation Loc;
+
+    DeclareTargetContextInfo(OpenMPDirectiveKind Kind, SourceLocation Loc)
+        : Kind(Kind), Loc(Loc) {}
+  };
+
   /// Number of nested '#pragma omp declare target' directives.
-  SmallVector<SourceLocation, 4> DeclareTargetNesting;
+  SmallVector<DeclareTargetContextInfo, 4> DeclareTargetNesting;
+
   /// Initialization of data-sharing attributes stack.
   void InitDataSharingAttributesStack();
   void DestroyDataSharingAttributesStack();
@@ -10476,19 +10499,28 @@
   const ValueDecl *getOpenMPDeclareMapperVarName() const;
 
   /// Called on the start of target region i.e. '#pragma omp declare target'.
-  bool ActOnStartOpenMPDeclareTargetDirective(SourceLocation Loc);
-  /// Called at the end of target region i.e. '#pragme omp end declare target'.
-  void ActOnFinishOpenMPDeclareTargetDirective();
+  bool ActOnStartOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI);
+
+  /// Called at the end of target region i.e. '#pragma omp end declare target'.
+  const DeclareTargetContextInfo ActOnOpenMPEndDeclareTargetDirective();
+
+  /// Called once a target context is completed, that can be when a
+  /// '#pragma omp end declare target' was encountered or when a
+  /// '#pragma omp declare target' without declaration-definition-seq was
+  /// encountered.
+  void ActOnFinishedOpenMPDeclareTargetContext(DeclareTargetContextInfo &DTCI);
+
   /// Searches for the provided declaration name for OpenMP declare target
   /// directive.
-  NamedDecl *
-  lookupOpenMPDeclareTargetName(Scope *CurScope, CXXScopeSpec &ScopeSpec,
-                                const DeclarationNameInfo &Id,
-                                NamedDeclSetType &SameDirectiveDecls);
+  NamedDecl *lookupOpenMPDeclareTargetName(Scope *CurScope,
+                                           CXXScopeSpec &ScopeSpec,
+                                           const DeclarationNameInfo &Id);
+
   /// Called on correct id-expression from the '#pragma omp declare target'.
   void ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc,
                                     OMPDeclareTargetDeclAttr::MapTypeTy MT,
                                     OMPDeclareTargetDeclAttr::DevTypeTy DT);
+
   /// Check declaration inside target region.
   void
   checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D,
diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp
index 09fdca6..662f867 100644
--- a/clang/lib/AST/AttrImpl.cpp
+++ b/clang/lib/AST/AttrImpl.cpp
@@ -141,57 +141,44 @@
     OS << ' ' << ConvertMapTypeTyToStr(getMapType());
 }
 
-llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
-OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(const ValueDecl *VD) {
+llvm::Optional<OMPDeclareTargetDeclAttr *>
+OMPDeclareTargetDeclAttr::getActiveAttr(const ValueDecl *VD) {
   if (!VD->hasAttrs())
     return llvm::None;
   unsigned Level = 0;
-  const OMPDeclareTargetDeclAttr *FoundAttr = nullptr;
-  for (const auto *Attr : VD->specific_attrs<OMPDeclareTargetDeclAttr>()) {
-    if (Level < Attr->getLevel()) {
+  OMPDeclareTargetDeclAttr *FoundAttr = nullptr;
+  for (auto *Attr : VD->specific_attrs<OMPDeclareTargetDeclAttr>()) {
+    if (Level <= Attr->getLevel()) {
       Level = Attr->getLevel();
       FoundAttr = Attr;
     }
   }
   if (FoundAttr)
-    return FoundAttr->getMapType();
+    return FoundAttr;
+  return llvm::None;
+}
 
+llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
+OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(const ValueDecl *VD) {
+  llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = getActiveAttr(VD);
+  if (ActiveAttr.hasValue())
+    return ActiveAttr.getValue()->getMapType();
   return llvm::None;
 }
 
 llvm::Optional<OMPDeclareTargetDeclAttr::DevTypeTy>
 OMPDeclareTargetDeclAttr::getDeviceType(const ValueDecl *VD) {
-  if (!VD->hasAttrs())
-    return llvm::None;
-  unsigned Level = 0;
-  const OMPDeclareTargetDeclAttr *FoundAttr = nullptr;
-  for (const auto *Attr : VD->specific_attrs<OMPDeclareTargetDeclAttr>()) {
-    if (Level < Attr->getLevel()) {
-      Level = Attr->getLevel();
-      FoundAttr = Attr;
-    }
-  }
-  if (FoundAttr)
-    return FoundAttr->getDevType();
-
+  llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = getActiveAttr(VD);
+  if (ActiveAttr.hasValue())
+    return ActiveAttr.getValue()->getDevType();
   return llvm::None;
 }
 
 llvm::Optional<SourceLocation>
 OMPDeclareTargetDeclAttr::getLocation(const ValueDecl *VD) {
-  if (!VD->hasAttrs())
-    return llvm::None;
-  unsigned Level = 0;
-  const OMPDeclareTargetDeclAttr *FoundAttr = nullptr;
-  for (const auto *Attr : VD->specific_attrs<OMPDeclareTargetDeclAttr>()) {
-    if (Level < Attr->getLevel()) {
-      Level = Attr->getLevel();
-      FoundAttr = Attr;
-    }
-  }
-  if (FoundAttr)
-    return FoundAttr->getRange().getBegin();
-
+  llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr = getActiveAttr(VD);
+  if (ActiveAttr.hasValue())
+    return ActiveAttr.getValue()->getRange().getBegin();
   return llvm::None;
 }
 
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 7b89f5d..10781db 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -2613,3 +2613,57 @@
 void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
   getOpenMPRuntime().processRequiresDirective(D);
 }
+
+void CodeGenModule::EmitOMPAllocateDecl(const OMPAllocateDecl *D) {
+  for (const Expr *E : D->varlists()) {
+    const auto *DE = cast<DeclRefExpr>(E);
+    const auto *VD = cast<VarDecl>(DE->getDecl());
+
+    // Skip all but globals.
+    if (!VD->hasGlobalStorage())
+      continue;
+
+    // Check if the global has been materialized yet or not. If not, we are done
+    // as any later generation will utilize the OMPAllocateDeclAttr. However, if
+    // we already emitted the global we might have done so before the
+    // OMPAllocateDeclAttr was attached, leading to the wrong address space
+    // (potentially). While not pretty, common practise is to remove the old IR
+    // global and generate a new one, so we do that here too. Uses are replaced
+    // properly.
+    StringRef MangledName = getMangledName(VD);
+    llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
+    if (!Entry)
+      continue;
+
+    // We can also keep the existing global if the address space is what we
+    // expect it to be, if not, it is replaced.
+    QualType ASTTy = VD->getType();
+    clang::LangAS GVAS = GetGlobalVarAddressSpace(VD);
+    auto TargetAS = getContext().getTargetAddressSpace(GVAS);
+    if (Entry->getType()->getAddressSpace() == TargetAS)
+      continue;
+
+    // Make a new global with the correct type / address space.
+    llvm::Type *Ty = getTypes().ConvertTypeForMem(ASTTy);
+    llvm::PointerType *PTy = llvm::PointerType::get(Ty, TargetAS);
+
+    // Replace all uses of the old global with a cast. Since we mutate the type
+    // in place we neeed an intermediate that takes the spot of the old entry
+    // until we can create the cast.
+    llvm::GlobalVariable *DummyGV = new llvm::GlobalVariable(
+        getModule(), Entry->getValueType(), false,
+        llvm::GlobalValue::CommonLinkage, nullptr, "dummy", nullptr,
+        llvm::GlobalVariable::NotThreadLocal, Entry->getAddressSpace());
+    Entry->replaceAllUsesWith(DummyGV);
+
+    Entry->mutateType(PTy);
+    llvm::Constant *NewPtrForOldDecl =
+        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+            Entry, DummyGV->getType());
+
+    // Now we have a casted version of the changed global, the dummy can be
+    // replaced and deleted.
+    DummyGV->replaceAllUsesWith(NewPtrForOldDecl);
+    DummyGV->eraseFromParent();
+  }
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 38da679..38341cb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -15,6 +15,7 @@
 #include "CGCleanup.h"
 #include "CGRecordLayout.h"
 #include "CodeGenFunction.h"
+#include "clang/AST/APValue.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/OpenMPClause.h"
@@ -2995,8 +2996,7 @@
   if (CGM.getLangOpts().OpenMPIsDevice) {
     // This could happen if the device compilation is invoked standalone.
     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
-      initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
-                                      OffloadingEntriesNum);
+      return;
     auto &Entry =
         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
     Entry.setAddress(Addr);
@@ -3067,10 +3067,8 @@
   if (CGM.getLangOpts().OpenMPIsDevice) {
     // This could happen if the device compilation is invoked standalone.
     if (!hasDeviceGlobalVarEntryInfo(VarName))
-      initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
+      return;
     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
-    assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
-           "Resetting with the new address.");
     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
       if (Entry.getVarSize().isZero()) {
         Entry.setVarSize(VarSize);
@@ -3086,8 +3084,6 @@
       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
       assert(Entry.isValid() && Entry.getFlags() == Flags &&
              "Entry not initialized!");
-      assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
-             "Resetting with the new address.");
       if (Entry.getVarSize().isZero()) {
         Entry.setVarSize(VarSize);
         Entry.setLinkage(Linkage);
@@ -10498,17 +10494,28 @@
     scanForTargetRegionsFunctions(II, ParentName);
 }
 
+static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
+  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+      OMPDeclareTargetDeclAttr::getDeviceType(VD);
+  if (!DevTy)
+    return false;
+  // Do not emit device_type(nohost) functions for the host.
+  if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+    return true;
+  // Do not emit device_type(host) functions for the device.
+  if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+    return true;
+  return false;
+}
+
 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
   // If emitting code for the host, we do not process FD here. Instead we do
   // the normal code generation.
   if (!CGM.getLangOpts().OpenMPIsDevice) {
-    if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
-      Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
-          OMPDeclareTargetDeclAttr::getDeviceType(FD);
-      // Do not emit device_type(nohost) functions for the host.
-      if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+    if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
+      if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
+                                  CGM.getLangOpts().OpenMPIsDevice))
         return true;
-    }
     return false;
   }
 
@@ -10517,10 +10524,8 @@
   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
     StringRef Name = CGM.getMangledName(GD);
     scanForTargetRegionsFunctions(FD->getBody(), Name);
-    Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
-        OMPDeclareTargetDeclAttr::getDeviceType(FD);
-    // Do not emit device_type(nohost) functions for the host.
-    if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+    if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
+                                CGM.getLangOpts().OpenMPIsDevice))
       return true;
   }
 
@@ -10530,6 +10535,10 @@
 }
 
 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
+  if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
+                              CGM.getLangOpts().OpenMPIsDevice))
+    return true;
+
   if (!CGM.getLangOpts().OpenMPIsDevice)
     return false;
 
@@ -10602,6 +10611,13 @@
   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
       !CGM.getLangOpts().OpenMPIsDevice)
     return;
+
+  // If we have host/nohost variables, they do not need to be registered.
+  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+      OMPDeclareTargetDeclAttr::getDeviceType(VD);
+  if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
+    return;
+
   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
   if (!Res) {
@@ -10632,6 +10648,10 @@
     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
     // Temp solution to prevent optimizations of the internal variables.
     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
+      // Do not create a "ref-variable" if the original is not also available
+      // on the host.
+      if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
+        return;
       std::string RefName = getName({VarName, "ref"});
       if (!CGM.GetGlobalValue(RefName)) {
         llvm::Constant *AddrRef =
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ab133b5..c61da1c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2669,19 +2669,24 @@
 }
 
 bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
+  // In OpenMP 5.0 variables and function may be marked as
+  // device_type(host/nohost) and we should not emit them eagerly unless we sure
+  // that they must be emitted on the host/device. To be sure we need to have
+  // seen a declare target with an explicit mentioning of the function, we know
+  // we have if the level of the declare target attribute is -1. Note that we
+  // check somewhere else if we should emit this at all.
+  if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd) {
+    llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
+        OMPDeclareTargetDeclAttr::getActiveAttr(Global);
+    if (!ActiveAttr || (*ActiveAttr)->getLevel() != (unsigned)-1)
+      return false;
+  }
+
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
     if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
       // Implicit template instantiations may change linkage if they are later
       // explicitly instantiated, so they should not be emitted eagerly.
       return false;
-    // In OpenMP 5.0 function may be marked as device_type(nohost) and we should
-    // not emit them eagerly unless we sure that the function must be emitted on
-    // the host.
-    if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd &&
-        !LangOpts.OpenMPIsDevice &&
-        !OMPDeclareTargetDeclAttr::getDeviceType(FD) &&
-        !FD->isUsed(/*CheckUsedAttr=*/false) && !FD->isReferenced())
-      return false;
   }
   if (const auto *VD = dyn_cast<VarDecl>(Global))
     if (Context.getInlineVariableDefinitionKind(VD) ==
@@ -4362,7 +4367,8 @@
 
     // Replace all uses of the old global with the new global
     llvm::Constant *NewPtrForOldDecl =
-        llvm::ConstantExpr::getBitCast(GV, Entry->getType());
+        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
+                                                             Entry->getType());
     Entry->replaceAllUsesWith(NewPtrForOldDecl);
 
     // Erase the old global, since it is no longer used.
@@ -5873,6 +5879,7 @@
     break;
 
   case Decl::OMPAllocate:
+    EmitOMPAllocateDecl(cast<OMPAllocateDecl>(D));
     break;
 
   case Decl::OMPDeclareReduction:
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 32d74c5..eb91d82 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1356,6 +1356,10 @@
   /// \param D Requires declaration
   void EmitOMPRequiresDecl(const OMPRequiresDecl *D);
 
+  /// Emit a code for the allocate directive.
+  /// \param D The allocate declaration
+  void EmitOMPAllocateDecl(const OMPAllocateDecl *D);
+
   /// Returns whether the given record has hidden LTO visibility and therefore
   /// may participate in (single-module) CFI and whole-program vtable
   /// optimization.
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 05731fe..c8d62f7 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -131,6 +131,7 @@
       {OMPD_declare, OMPD_simd, OMPD_declare_simd},
       {OMPD_declare, OMPD_target, OMPD_declare_target},
       {OMPD_declare, OMPD_variant, OMPD_declare_variant},
+      {OMPD_begin_declare, OMPD_target, OMPD_begin_declare_target},
       {OMPD_begin_declare, OMPD_variant, OMPD_begin_declare_variant},
       {OMPD_end_declare, OMPD_variant, OMPD_end_declare_variant},
       {OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel},
@@ -1664,30 +1665,41 @@
   return SimpleClauseData(Type, Loc, LOpen, TypeLoc, RLoc);
 }
 
-Parser::DeclGroupPtrTy Parser::ParseOMPDeclareTargetClauses() {
-  // OpenMP 4.5 syntax with list of entities.
-  Sema::NamedDeclSetType SameDirectiveDecls;
-  SmallVector<std::tuple<OMPDeclareTargetDeclAttr::MapTypeTy, SourceLocation,
-                         NamedDecl *>,
-              4>
-      DeclareTargetDecls;
-  OMPDeclareTargetDeclAttr::DevTypeTy DT = OMPDeclareTargetDeclAttr::DT_Any;
+void Parser::ParseOMPDeclareTargetClauses(
+    Sema::DeclareTargetContextInfo &DTCI) {
   SourceLocation DeviceTypeLoc;
+  bool RequiresToOrLinkClause = false;
+  bool HasToOrLinkClause = false;
   while (Tok.isNot(tok::annot_pragma_openmp_end)) {
     OMPDeclareTargetDeclAttr::MapTypeTy MT = OMPDeclareTargetDeclAttr::MT_To;
-    if (Tok.is(tok::identifier)) {
+    bool HasIdentifier = Tok.is(tok::identifier);
+    if (HasIdentifier) {
+      // If we see any clause we need a to or link clause.
+      RequiresToOrLinkClause = true;
       IdentifierInfo *II = Tok.getIdentifierInfo();
       StringRef ClauseName = II->getName();
       bool IsDeviceTypeClause =
           getLangOpts().OpenMP >= 50 &&
           getOpenMPClauseKind(ClauseName) == OMPC_device_type;
-      // Parse 'to|link|device_type' clauses.
-      if (!OMPDeclareTargetDeclAttr::ConvertStrToMapTypeTy(ClauseName, MT) &&
-          !IsDeviceTypeClause) {
+
+      bool IsToOrLinkClause =
+          OMPDeclareTargetDeclAttr::ConvertStrToMapTypeTy(ClauseName, MT);
+      assert((!IsDeviceTypeClause || !IsToOrLinkClause) && "Cannot be both!");
+
+      if (!IsDeviceTypeClause && DTCI.Kind == OMPD_begin_declare_target) {
         Diag(Tok, diag::err_omp_declare_target_unexpected_clause)
-            << ClauseName << (getLangOpts().OpenMP >= 50 ? 1 : 0);
+            << ClauseName << 0;
         break;
       }
+      if (!IsDeviceTypeClause && !IsToOrLinkClause) {
+        Diag(Tok, diag::err_omp_declare_target_unexpected_clause)
+            << ClauseName << (getLangOpts().OpenMP >= 50 ? 2 : 1);
+        break;
+      }
+
+      if (IsToOrLinkClause)
+        HasToOrLinkClause = true;
+
       // Parse 'device_type' clause and go to next clause if any.
       if (IsDeviceTypeClause) {
         Optional<SimpleClauseData> DevTypeData =
@@ -1697,16 +1709,17 @@
             // We already saw another device_type clause, diagnose it.
             Diag(DevTypeData.getValue().Loc,
                  diag::warn_omp_more_one_device_type_clause);
+            break;
           }
           switch (static_cast<OpenMPDeviceType>(DevTypeData.getValue().Type)) {
           case OMPC_DEVICE_TYPE_any:
-            DT = OMPDeclareTargetDeclAttr::DT_Any;
+            DTCI.DT = OMPDeclareTargetDeclAttr::DT_Any;
             break;
           case OMPC_DEVICE_TYPE_host:
-            DT = OMPDeclareTargetDeclAttr::DT_Host;
+            DTCI.DT = OMPDeclareTargetDeclAttr::DT_Host;
             break;
           case OMPC_DEVICE_TYPE_nohost:
-            DT = OMPDeclareTargetDeclAttr::DT_NoHost;
+            DTCI.DT = OMPDeclareTargetDeclAttr::DT_NoHost;
             break;
           case OMPC_DEVICE_TYPE_unknown:
             llvm_unreachable("Unexpected device_type");
@@ -1717,37 +1730,47 @@
       }
       ConsumeToken();
     }
-    auto &&Callback = [this, MT, &DeclareTargetDecls, &SameDirectiveDecls](
-                          CXXScopeSpec &SS, DeclarationNameInfo NameInfo) {
-      NamedDecl *ND = Actions.lookupOpenMPDeclareTargetName(
-          getCurScope(), SS, NameInfo, SameDirectiveDecls);
-      if (ND)
-        DeclareTargetDecls.emplace_back(MT, NameInfo.getLoc(), ND);
-    };
-    if (ParseOpenMPSimpleVarList(OMPD_declare_target, Callback,
-                                 /*AllowScopeSpecifier=*/true))
+
+    if (DTCI.Kind == OMPD_declare_target || HasIdentifier) {
+      auto &&Callback = [this, MT, &DTCI](CXXScopeSpec &SS,
+                                          DeclarationNameInfo NameInfo) {
+        NamedDecl *ND =
+            Actions.lookupOpenMPDeclareTargetName(getCurScope(), SS, NameInfo);
+        if (!ND)
+          return;
+        Sema::DeclareTargetContextInfo::MapInfo MI{MT, NameInfo.getLoc()};
+        bool FirstMapping = DTCI.ExplicitlyMapped.try_emplace(ND, MI).second;
+        if (!FirstMapping)
+          Diag(NameInfo.getLoc(), diag::err_omp_declare_target_multiple)
+              << NameInfo.getName();
+      };
+      if (ParseOpenMPSimpleVarList(OMPD_declare_target, Callback,
+                                   /*AllowScopeSpecifier=*/true))
+        break;
+    }
+
+    if (Tok.is(tok::l_paren)) {
+      Diag(Tok,
+           diag::err_omp_begin_declare_target_unexpected_implicit_to_clause);
       break;
+    }
+    if (!HasIdentifier && Tok.isNot(tok::annot_pragma_openmp_end)) {
+      Diag(Tok,
+           diag::err_omp_declare_target_unexpected_clause_after_implicit_to);
+      break;
+    }
 
     // Consume optional ','.
     if (Tok.is(tok::comma))
       ConsumeToken();
   }
+
+  // For declare target require at least 'to' or 'link' to be present.
+  if (DTCI.Kind == OMPD_declare_target && RequiresToOrLinkClause &&
+      !HasToOrLinkClause)
+    Diag(DTCI.Loc, diag::err_omp_declare_target_missing_to_or_link_clause);
+
   SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
-  ConsumeAnyToken();
-  for (auto &MTLocDecl : DeclareTargetDecls) {
-    OMPDeclareTargetDeclAttr::MapTypeTy MT;
-    SourceLocation Loc;
-    NamedDecl *ND;
-    std::tie(MT, Loc, ND) = MTLocDecl;
-    // device_type clause is applied only to functions.
-    Actions.ActOnOpenMPDeclareTargetName(
-        ND, Loc, MT, isa<VarDecl>(ND) ? OMPDeclareTargetDeclAttr::DT_Any : DT);
-  }
-  SmallVector<Decl *, 4> Decls(SameDirectiveDecls.begin(),
-                               SameDirectiveDecls.end());
-  if (Decls.empty())
-    return DeclGroupPtrTy();
-  return Actions.BuildDeclaratorGroup(Decls);
 }
 
 void Parser::skipUntilPragmaOpenMPEnd(OpenMPDirectiveKind DKind) {
@@ -1784,10 +1807,11 @@
     SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
 }
 
-void Parser::ParseOMPEndDeclareTargetDirective(OpenMPDirectiveKind DKind,
+void Parser::ParseOMPEndDeclareTargetDirective(OpenMPDirectiveKind BeginDKind,
+                                               OpenMPDirectiveKind EndDKind,
                                                SourceLocation DKLoc) {
-  parseOMPEndDirective(OMPD_declare_target, OMPD_end_declare_target, DKind,
-                       DKLoc, Tok.getLocation(),
+  parseOMPEndDirective(BeginDKind, OMPD_end_declare_target, EndDKind, DKLoc,
+                       Tok.getLocation(),
                        /* SkipUntilOpenMPEnd */ false);
   // Skip the last annot_pragma_openmp_end.
   if (Tok.is(tok::annot_pragma_openmp_end))
@@ -2101,53 +2125,41 @@
     ParseOMPDeclareVariantClauses(Ptr, Toks, Loc);
     return Ptr;
   }
+  case OMPD_begin_declare_target:
   case OMPD_declare_target: {
     SourceLocation DTLoc = ConsumeAnyToken();
-    if (Tok.isNot(tok::annot_pragma_openmp_end)) {
-      return ParseOMPDeclareTargetClauses();
-    }
+    bool HasClauses = Tok.isNot(tok::annot_pragma_openmp_end);
+    bool HasImplicitMappings =
+        DKind == OMPD_begin_declare_target || !HasClauses;
+    Sema::DeclareTargetContextInfo DTCI(DKind, DTLoc);
+    if (HasClauses)
+      ParseOMPDeclareTargetClauses(DTCI);
 
     // Skip the last annot_pragma_openmp_end.
     ConsumeAnyToken();
 
-    if (!Actions.ActOnStartOpenMPDeclareTargetDirective(DTLoc))
-      return DeclGroupPtrTy();
-
-    ParsingOpenMPDirectiveRAII NormalScope(*this, /*Value=*/false);
-    llvm::SmallVector<Decl *, 4> Decls;
-    while (Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace)) {
-      if (Tok.isAnnotation() && Tok.is(tok::annot_pragma_openmp)) {
-        TentativeParsingAction TPA(*this);
-        ConsumeAnnotationToken();
-        DKind = parseOpenMPDirectiveKind(*this);
-        if (DKind != OMPD_end_declare_target)
-          TPA.Revert();
-        else
-          TPA.Commit();
-      }
-      if (DKind == OMPD_end_declare_target)
-        break;
-      DeclGroupPtrTy Ptr;
-      // Here we expect to see some function declaration.
-      if (AS == AS_none) {
-        assert(TagType == DeclSpec::TST_unspecified);
-        MaybeParseCXX11Attributes(Attrs);
-        ParsingDeclSpec PDS(*this);
-        Ptr = ParseExternalDeclaration(Attrs, &PDS);
-      } else {
-        Ptr =
-            ParseCXXClassMemberDeclarationWithPragmas(AS, Attrs, TagType, Tag);
-      }
-      if (Ptr) {
-        DeclGroupRef Ref = Ptr.get();
-        Decls.append(Ref.begin(), Ref.end());
-      }
+    if (HasImplicitMappings) {
+      Actions.ActOnStartOpenMPDeclareTargetContext(DTCI);
+      return nullptr;
     }
 
-    ParseOMPEndDeclareTargetDirective(DKind, DTLoc);
-    Actions.ActOnFinishOpenMPDeclareTargetDirective();
+    Actions.ActOnFinishedOpenMPDeclareTargetContext(DTCI);
+    llvm::SmallVector<Decl *, 4> Decls;
+    for (auto &It : DTCI.ExplicitlyMapped)
+      Decls.push_back(It.first);
     return Actions.BuildDeclaratorGroup(Decls);
   }
+  case OMPD_end_declare_target: {
+    if (!Actions.isInOpenMPDeclareTargetContext()) {
+      Diag(Tok, diag::err_omp_unexpected_directive)
+          << 1 << getOpenMPDirectiveName(DKind);
+      break;
+    }
+    const Sema::DeclareTargetContextInfo &DTCI =
+        Actions.ActOnOpenMPEndDeclareTargetDirective();
+    ParseOMPEndDeclareTargetDirective(DTCI.Kind, DKind, DTCI.Loc);
+    return nullptr;
+  }
   case OMPD_unknown:
     Diag(Tok, diag::err_omp_unknown_directive);
     break;
@@ -2191,7 +2203,6 @@
   case OMPD_parallel_master_taskloop:
   case OMPD_parallel_master_taskloop_simd:
   case OMPD_distribute:
-  case OMPD_end_declare_target:
   case OMPD_target_update:
   case OMPD_distribute_parallel_for:
   case OMPD_distribute_parallel_for_simd:
@@ -2570,6 +2581,7 @@
   }
   case OMPD_declare_simd:
   case OMPD_declare_target:
+  case OMPD_begin_declare_target:
   case OMPD_end_declare_target:
   case OMPD_requires:
   case OMPD_begin_declare_variant:
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index c1aa638..fdb2086 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -18457,7 +18457,7 @@
         OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
     // DevTy may be changed later by
     //  #pragma omp declare target to(*) device_type(*).
-    // Therefore DevTyhaving no value does not imply host. The emission status
+    // Therefore DevTy having no value does not imply host. The emission status
     // will be checked again at the end of compilation unit with Final = true.
     if (DevTy.hasValue())
       if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host)
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 068065b..d058039 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2481,8 +2481,8 @@
   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
       OMPDeclareTargetDeclAttr::getDeviceType(Caller->getMostRecentDecl());
   // Ignore host functions during device analyzis.
-  if (LangOpts.OpenMPIsDevice && DevTy &&
-      *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+  if (LangOpts.OpenMPIsDevice &&
+      (!DevTy || *DevTy == OMPDeclareTargetDeclAttr::DT_Host))
     return;
   // Ignore nohost functions during host analyzis.
   if (!LangOpts.OpenMPIsDevice && DevTy &&
@@ -19974,7 +19974,8 @@
       OMPDefaultmapClause(StartLoc, LParenLoc, MLoc, KindLoc, EndLoc, Kind, M);
 }
 
-bool Sema::ActOnStartOpenMPDeclareTargetDirective(SourceLocation Loc) {
+bool Sema::ActOnStartOpenMPDeclareTargetContext(
+    DeclareTargetContextInfo &DTCI) {
   DeclContext *CurLexicalContext = getCurLexicalContext();
   if (!CurLexicalContext->isFileContext() &&
       !CurLexicalContext->isExternCContext() &&
@@ -19983,23 +19984,30 @@
       !isa<ClassTemplateDecl>(CurLexicalContext) &&
       !isa<ClassTemplatePartialSpecializationDecl>(CurLexicalContext) &&
       !isa<ClassTemplateSpecializationDecl>(CurLexicalContext)) {
-    Diag(Loc, diag::err_omp_region_not_file_context);
+    Diag(DTCI.Loc, diag::err_omp_region_not_file_context);
     return false;
   }
-  DeclareTargetNesting.push_back(Loc);
+  DeclareTargetNesting.push_back(DTCI);
   return true;
 }
 
-void Sema::ActOnFinishOpenMPDeclareTargetDirective() {
+const Sema::DeclareTargetContextInfo
+Sema::ActOnOpenMPEndDeclareTargetDirective() {
   assert(!DeclareTargetNesting.empty() &&
-         "Unexpected ActOnFinishOpenMPDeclareTargetDirective");
-  DeclareTargetNesting.pop_back();
+         "check isInOpenMPDeclareTargetContext() first!");
+  return DeclareTargetNesting.pop_back_val();
 }
 
-NamedDecl *
-Sema::lookupOpenMPDeclareTargetName(Scope *CurScope, CXXScopeSpec &ScopeSpec,
-                                    const DeclarationNameInfo &Id,
-                                    NamedDeclSetType &SameDirectiveDecls) {
+void Sema::ActOnFinishedOpenMPDeclareTargetContext(
+    DeclareTargetContextInfo &DTCI) {
+  for (auto &It : DTCI.ExplicitlyMapped)
+    ActOnOpenMPDeclareTargetName(It.first, It.second.Loc, It.second.MT,
+                                 DTCI.DT);
+}
+
+NamedDecl *Sema::lookupOpenMPDeclareTargetName(Scope *CurScope,
+                                               CXXScopeSpec &ScopeSpec,
+                                               const DeclarationNameInfo &Id) {
   LookupResult Lookup(*this, Id, LookupOrdinaryName);
   LookupParsedName(Lookup, CurScope, &ScopeSpec, true);
 
@@ -20028,8 +20036,6 @@
     Diag(Id.getLoc(), diag::err_omp_invalid_target_decl) << Id.getName();
     return nullptr;
   }
-  if (!SameDirectiveDecls.insert(cast<NamedDecl>(ND->getCanonicalDecl())))
-    Diag(Id.getLoc(), diag::err_omp_declare_target_multiple) << Id.getName();
   return ND;
 }
 
@@ -20046,32 +20052,35 @@
       (ND->isUsed(/*CheckUsedAttr=*/false) || ND->isReferenced()))
     Diag(Loc, diag::warn_omp_declare_target_after_first_use);
 
+  // Explicit declare target lists have precedence.
+  const unsigned Level = -1;
+
   auto *VD = cast<ValueDecl>(ND);
-  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
-      OMPDeclareTargetDeclAttr::getDeviceType(VD);
-  Optional<SourceLocation> AttrLoc = OMPDeclareTargetDeclAttr::getLocation(VD);
-  if (DevTy.hasValue() && *DevTy != DT &&
-      (DeclareTargetNesting.empty() ||
-       *AttrLoc != DeclareTargetNesting.back())) {
+  llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
+      OMPDeclareTargetDeclAttr::getActiveAttr(VD);
+  if (ActiveAttr.hasValue() && ActiveAttr.getValue()->getDevType() != DT &&
+      ActiveAttr.getValue()->getLevel() == Level) {
     Diag(Loc, diag::err_omp_device_type_mismatch)
         << OMPDeclareTargetDeclAttr::ConvertDevTypeTyToStr(DT)
-        << OMPDeclareTargetDeclAttr::ConvertDevTypeTyToStr(*DevTy);
+        << OMPDeclareTargetDeclAttr::ConvertDevTypeTyToStr(
+               ActiveAttr.getValue()->getDevType());
     return;
   }
-  Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
-      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
-  if (!Res || (!DeclareTargetNesting.empty() &&
-               *AttrLoc == DeclareTargetNesting.back())) {
-    auto *A = OMPDeclareTargetDeclAttr::CreateImplicit(
-        Context, MT, DT, DeclareTargetNesting.size() + 1,
-        SourceRange(Loc, Loc));
-    ND->addAttr(A);
-    if (ASTMutationListener *ML = Context.getASTMutationListener())
-      ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
-    checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc);
-  } else if (*Res != MT) {
+  if (ActiveAttr.hasValue() && ActiveAttr.getValue()->getMapType() != MT &&
+      ActiveAttr.getValue()->getLevel() == Level) {
     Diag(Loc, diag::err_omp_declare_target_to_and_link) << ND;
+    return;
   }
+
+  if (ActiveAttr.hasValue() && ActiveAttr.getValue()->getLevel() == Level)
+    return;
+
+  auto *A = OMPDeclareTargetDeclAttr::CreateImplicit(Context, MT, DT, Level,
+                                                     SourceRange(Loc, Loc));
+  ND->addAttr(A);
+  if (ASTMutationListener *ML = Context.getASTMutationListener())
+    ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
+  checkDeclIsAllowedInOpenMPTarget(nullptr, ND, Loc);
 }
 
 static void checkDeclInTargetContext(SourceLocation SL, SourceRange SR,
@@ -20085,8 +20094,6 @@
       (SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true) ||
        SemaRef.getCurBlock() || SemaRef.getCurCapturedRegion()) &&
       VD->hasGlobalStorage()) {
-    llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapTy =
-        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
     if (!MapTy || *MapTy != OMPDeclareTargetDeclAttr::MT_To) {
       // OpenMP 5.0, 2.12.7 declare target Directive, Restrictions
       // If a lambda declaration and definition appears between a
@@ -20150,15 +20157,19 @@
     if ((E || !VD->getType()->isIncompleteType()) &&
         !checkValueDeclInTarget(SL, SR, *this, DSAStack, VD))
       return;
-    if (!E && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
+    if (!E && isInOpenMPDeclareTargetContext()) {
       // Checking declaration inside declare target region.
       if (isa<VarDecl>(D) || isa<FunctionDecl>(D) ||
           isa<FunctionTemplateDecl>(D)) {
+        llvm::Optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
+            OMPDeclareTargetDeclAttr::getActiveAttr(VD);
+        unsigned Level = DeclareTargetNesting.size();
+        if (ActiveAttr.hasValue() && ActiveAttr.getValue()->getLevel() >= Level)
+          return;
+        DeclareTargetContextInfo &DTCI = DeclareTargetNesting.back();
         auto *A = OMPDeclareTargetDeclAttr::CreateImplicit(
-            Context, OMPDeclareTargetDeclAttr::MT_To,
-            OMPDeclareTargetDeclAttr::DT_Any, DeclareTargetNesting.size(),
-            SourceRange(DeclareTargetNesting.back(),
-                        DeclareTargetNesting.back()));
+            Context, OMPDeclareTargetDeclAttr::MT_To, DTCI.DT, Level,
+            SourceRange(DTCI.Loc, DTCI.Loc));
         D->addAttr(A);
         if (ASTMutationListener *ML = Context.getASTMutationListener())
           ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 6e3e8bf..f7b476f 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -11,10 +11,14 @@
 #include <complex.h>
 #endif
 
-// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__nv_isnand(
-// CHECK-DAG: call i32 @__nv_isinfd(
-// CHECK-DAG: call double @__nv_copysign(
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
+// CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
 
 // CHECK: define weak {{.*}} @__mulsc3
 // CHECK-DAG: call i32 @__nv_isnanf(
@@ -30,14 +34,10 @@
 // CHECK-DAG: call double @__nv_fabs(
 // CHECK-DAG: call double @__nv_logb(
 
-// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call i32 @__nv_isnanf(
-// CHECK-DAG: call i32 @__nv_isinff(
-// CHECK-DAG: call i32 @__nv_finitef(
-// CHECK-DAG: call float @__nv_copysignf(
-// CHECK-DAG: call float @__nv_scalbnf(
-// CHECK-DAG: call float @__nv_fabsf(
-// CHECK-DAG: call float @__nv_logbf(
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
 void test_scmplx(float _Complex a) {
 #pragma omp target
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
index c9f94f4..12467cf 100644
--- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp
+++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -82,31 +82,38 @@
 // CHECK1-SAME: () #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[A1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
+// CHECK1-NEXT:    [[A2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+// CHECK1-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
+// CHECK1-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
+// CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
 // CHECK1:       .spmd:
 // CHECK1-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK1:       .non-spmd:
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 128, i16 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty*
+// CHECK1-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 4, i64 128
+// CHECK1-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
+// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty*
 // CHECK1-NEXT:    br label [[DOTEXIT]]
 // CHECK1:       .exit:
-// CHECK1-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty* [ null, [[DOTSPMD]] ], [ [[TMP3]], [[DOTNON_SPMD]] ]
+// CHECK1-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to %struct._globalized_locals_ty.0*
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[_SELECT_STACK]], i32 0, i32 0
 // CHECK1-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK1-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK1-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i32* [[A1]], i32* [[TMP4]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A]], i32 0, i32 [[NVPTX_LANE_ID]]
+// CHECK1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], i32* [[A1]], i32* [[TMP9]]
+// CHECK1-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], i32* [[A2]], i32* [[TMP10]]
+// CHECK1-NEXT:    [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[TMP11]]) #[[ATTR4]]
 // CHECK1-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[DOTEXIT3:%.*]], label [[DOTNON_SPMD2:%.*]]
-// CHECK1:       .non-spmd2:
-// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to i8*
-// CHECK1-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP6]])
-// CHECK1-NEXT:    br label [[DOTEXIT3]]
-// CHECK1:       .exit3:
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK1-NEXT:    ret i32 [[TMP7]]
+// CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
+// CHECK1:       .non-spmd3:
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to i8*
+// CHECK1-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP12]])
+// CHECK1-NEXT:    br label [[DOTEXIT4]]
+// CHECK1:       .exit4:
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK1-NEXT:    ret i32 [[TMP13]]
 //
diff --git a/clang/test/OpenMP/declare_target_device_only_compilation.cpp b/clang/test/OpenMP/declare_target_device_only_compilation.cpp
deleted file mode 100644
index 7be635d..0000000
--- a/clang/test/OpenMP/declare_target_device_only_compilation.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
-
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-i386-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-i386-host.bc -o - | FileCheck %s
-
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=x86_64-unknown-linux-gnu -emit-llvm-bc %s -o %t-x86_64-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86_64-host.bc -o - | FileCheck %s
-
-// expected-no-diagnostics
-
-#pragma omp declare target
-#pragma omp begin declare variant match(device={kind(nohost)})
-int G1;
-#pragma omp end declare variant
-#pragma omp end declare target
-
-// CHECK: @[[G:.+]] = hidden {{.*}}global i32 0, align 4
-// CHECK: !omp_offload.info = !{!0}
-// CHECK: !0 = !{i32 1, !"[[G]]", i32 0, i32 0}
diff --git a/clang/test/OpenMP/declare_target_messages.cpp b/clang/test/OpenMP/declare_target_messages.cpp
index 17a60ce..b5ffe7b 100644
--- a/clang/test/OpenMP/declare_target_messages.cpp
+++ b/clang/test/OpenMP/declare_target_messages.cpp
@@ -17,13 +17,16 @@
 void f();
 #pragma omp end declare target shared(a) // expected-warning {{extra tokens at the end of '#pragma omp end declare target' are ignored}}
 
-#pragma omp declare target map(a) // omp45-error {{unexpected 'map' clause, only 'to' or 'link' clauses expected}} omp5-error {{unexpected 'map' clause, only 'to', 'link' or 'device_type' clauses expected}}
+#pragma omp declare target map(a) // expected-error {{expected at least one 'to' or 'link' clause}} omp45-error {{unexpected 'map' clause, only 'to' or 'link' clauses expected}} omp5-error {{unexpected 'map' clause, only 'to', 'link' or 'device_type' clauses expected}}
 
 #pragma omp declare target to(foo1) // expected-error {{use of undeclared identifier 'foo1'}}
 
 #pragma omp declare target link(foo2) // expected-error {{use of undeclared identifier 'foo2'}}
 
-#pragma omp declare target to(f) device_type(any) device_type(any) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} omp5-warning 2 {{more than one 'device_type' clause is specified}} omp5-error {{'device_type(host)' does not match previously specified 'device_type(any)' for the same declaration}}
+#pragma omp declare target to(f) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} dev5-note {{marked as 'device_type(host)' here}}
+
+void q();
+#pragma omp declare target to(q) device_type(any) device_type(any) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} omp5-warning {{more than one 'device_type' clause is specified}}
 
 void c();
 
@@ -118,7 +121,8 @@
   g = object.method();
   g += object.method1();
   g += object1.method() + p;
-  f();
+  f(); // dev5-error {{function with 'device_type(host)' is not available on device}}
+  q();
   c();
 }
 #pragma omp declare target
@@ -150,10 +154,10 @@
 }
 
 namespace {
-#pragma omp declare target // expected-note {{to match this '#pragma omp declare target'}}
+#pragma omp declare target
   int x;
-} //  expected-error {{expected '#pragma omp end declare target'}}
-#pragma omp end declare target // expected-error {{unexpected OpenMP directive '#pragma omp end declare target'}}
+}
+#pragma omp end declare target
 
 #pragma omp declare target link(S) // expected-error {{'S' used in declare target directive is not a variable or a function name}}
 
@@ -187,4 +191,10 @@
 void any7() {device();} // host5-error {{function with 'device_type(nohost)' is not available on host}}
 void any8() {any2();}
 
-#pragma omp declare target // expected-error {{expected '#pragma omp end declare target'}} expected-note {{to match this '#pragma omp declare target'}}
+int MultiDevTy;
+#pragma omp declare target to(MultiDevTy) device_type(any)    // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}}
+#pragma omp declare target to(MultiDevTy) device_type(host)   // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} omp5-error {{'device_type(host)' does not match previously specified 'device_type(any)' for the same declaration}}
+#pragma omp declare target to(MultiDevTy) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} omp5-error {{'device_type(nohost)' does not match previously specified 'device_type(any)' for the same declaration}}
+
+// TODO: Issue an error message error {{expected '#pragma omp end declare target'}} note {{to match this '#pragma omp declare target'}}
+#pragma omp declare target
diff --git a/clang/test/OpenMP/declare_target_only_one_side_compilation.cpp b/clang/test/OpenMP/declare_target_only_one_side_compilation.cpp
new file mode 100644
index 0000000..ca8ff8c
--- /dev/null
+++ b/clang/test/OpenMP/declare_target_only_one_side_compilation.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix HOST
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix DEVICE
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix HOST
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-i386-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-i386-host.bc -o - | FileCheck %s --check-prefix DEVICE
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix HOST
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=x86_64-unknown-linux-gnu -emit-llvm-bc %s -o %t-x86_64-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86_64-host.bc -o - | FileCheck %s --check-prefix DEVICE
+
+// expected-no-diagnostics
+
+#pragma omp declare target
+#pragma omp begin declare variant match(device = {kind(nohost)})
+int G1;
+static int G2;
+#pragma omp end declare variant
+#pragma omp end declare target
+
+#pragma omp begin declare target device_type(nohost)
+int G3;
+static int G4;
+#pragma omp end declare target
+
+#pragma omp declare target
+int G5;
+static int G6;
+#pragma omp end declare target
+
+#pragma omp declare target to(G5, G6) device_type(nohost)
+
+#pragma omp begin declare target device_type(host)
+int G7;
+static int G8;
+#pragma omp end declare target
+
+#pragma omp declare target
+int G9;
+static int G10;
+#pragma omp end declare target
+
+int G11;
+static int G12;
+#pragma omp declare target to(G9, G10, G11, G12) device_type(host)
+
+// TODO: The code below should probably work but it is not 100% clear.
+#if 0
+#pragma omp declare target
+#pragma omp begin declare variant match(device = {kind(host)})
+int GX;
+static int GY;
+#pragma omp end declare variant
+#pragma omp end declare target
+#endif
+
+// TODO: It is odd, probably wrong, that we don't mangle all variables.
+
+// DEVICE-DAG: @G1 = hidden {{.*}}global i32 0, align 4
+// DEVICE-DAG: @_ZL2G2 = internal {{.*}}global i32 0, align 4
+// DEVICE-DAG: @G3 = hidden {{.*}}global i32 0, align 4
+// DEVICE-DAG: @_ZL2G4 = internal {{.*}}global i32 0, align 4
+// DEVICE-DAG: @G5 = hidden {{.*}}global i32 0, align 4
+// DEVICE-DAG: @_ZL2G6 = internal {{.*}}global i32 0, align 4
+// DEVICE-NOT: ref
+// DEVICE-NOT: llvm.used
+// DEVICE-NOT: omp_offload
+
+// HOST-DAG: @G7 = dso_local global i32 0, align 4
+// HOST-DAG: @_ZL2G8 = internal global i32 0, align 4
+// HOST-DAG: @G9 = dso_local global i32 0, align 4
+// HOST-DAG: @_ZL3G10 = internal global i32 0, align 4
+// HOST-DAG: @G11 = dso_local global i32 0, align 4
+// HOST-DAG: @_ZL3G12 = internal global i32 0, align 4
diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp
index 2c1630a..a677301 100644
--- a/clang/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp
@@ -176,6 +176,28 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK1-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -214,6 +236,23 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK1-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -693,45 +732,6 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK1-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK1-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_for_firstprivate_codegen.cpp
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -769,6 +769,28 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK2-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -807,6 +829,23 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK2-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -1286,45 +1325,6 @@
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK2-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_for_firstprivate_codegen.cpp
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -1362,6 +1362,28 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK3-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1400,6 +1422,23 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK3-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1511,45 +1550,6 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK3-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK3-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_for_firstprivate_codegen.cpp
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1587,6 +1587,28 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK4-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK4-NEXT:    ret void
+//
+//
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
@@ -1625,6 +1647,23 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK4-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK4-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK4-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
@@ -1801,45 +1840,6 @@
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK4-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK4-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK4-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK4-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
 // CHECK4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_for_firstprivate_codegen.cpp
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
diff --git a/clang/test/OpenMP/irbuilder_for_iterator.cpp b/clang/test/OpenMP/irbuilder_for_iterator.cpp
index a85e300..d15d891 100644
--- a/clang/test/OpenMP/irbuilder_for_iterator.cpp
+++ b/clang/test/OpenMP/irbuilder_for_iterator.cpp
@@ -28,7 +28,7 @@
 
 #endif // HEADER
 // CHECK-LABEL: define {{[^@]+}}@workshareloop_iterator
-// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) [[ATTR0:#.*]] {
+// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -58,8 +58,8 @@
 // CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[DOTCOUNT]], 1
 // CHECK-NEXT:    store i64 [[TMP2]], i64* [[P_UPPERBOUND]], align 8
 // CHECK-NEXT:    store i64 1, i64* [[P_STRIDE]], align 8
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
-// CHECK-NEXT:    call void @__kmpc_for_static_init_8u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
 // CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8
 // CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP4]], [[TMP3]]
@@ -97,16 +97,16 @@
 // CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
 // CHECK:       omp_loop.exit:
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
 // CHECK:       omp_loop.after:
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR2:#.*]] {
+// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8
 // CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
@@ -145,7 +145,7 @@
 //
 //
 // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK-SAME: (%struct.MyIterator* nonnull align 1 dereferenceable(1) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR2]] {
+// CHECK-SAME: (%struct.MyIterator* nonnull align 1 dereferenceable(1) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca %struct.MyIterator*, align 8
 // CHECK-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i64, align 8
@@ -159,7 +159,7 @@
 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8
 // CHECK-NEXT:    [[MUL:%.*]] = mul i64 1, [[TMP2]]
 // CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[MUL]] to i32
-// CHECK-NEXT:    call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
+// CHECK-NEXT:    call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
 // CHECK-NEXT:    [[TMP3:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call nonnull align 1 dereferenceable(1) %struct.MyIterator* @_ZN10MyIteratoraSERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP3]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[REF_TMP]])
 // CHECK-NEXT:    ret void
diff --git a/clang/test/OpenMP/irbuilder_for_rangefor.cpp b/clang/test/OpenMP/irbuilder_for_rangefor.cpp
index 6924354..cc01b18 100644
--- a/clang/test/OpenMP/irbuilder_for_rangefor.cpp
+++ b/clang/test/OpenMP/irbuilder_for_rangefor.cpp
@@ -34,7 +34,7 @@
 
 #endif // HEADER
 // CHECK-LABEL: define {{[^@]+}}@workshareloop_rangefor
-// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) [[ATTR0:#.*]] {
+// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -57,9 +57,9 @@
 // CHECK-NEXT:    call void @_ZN7MyRangeC1Ei(%struct.MyRange* nonnull dereferenceable(1) [[REF_TMP]], i32 42)
 // CHECK-NEXT:    store %struct.MyRange* [[REF_TMP]], %struct.MyRange** [[__RANGE2]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8
-// CHECK-NEXT:    call void @_ZN7MyRange5beginEv(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[__BEGIN2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP0]])
+// CHECK-NEXT:    call void @_ZN7MyRange5beginEv(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[__BEGIN2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP0]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8
-// CHECK-NEXT:    call void @_ZN7MyRange3endEv(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[__END2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP1]])
+// CHECK-NEXT:    call void @_ZN7MyRange3endEv(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[__END2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP1]])
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[__BEGIN2]])
 // CHECK-NEXT:    store i32 [[CALL]], i32* [[I]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
@@ -76,8 +76,8 @@
 // CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[DOTCOUNT]], 1
 // CHECK-NEXT:    store i64 [[TMP5]], i64* [[P_UPPERBOUND]], align 8
 // CHECK-NEXT:    store i64 1, i64* [[P_STRIDE]], align 8
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
-// CHECK-NEXT:    call void @__kmpc_for_static_init_8u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
 // CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8
 // CHECK-NEXT:    [[TMP7:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8
 // CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], [[TMP6]]
@@ -113,16 +113,16 @@
 // CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
 // CHECK:       omp_loop.exit:
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
 // CHECK:       omp_loop.after:
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR2:#.*]] {
+// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8
 // CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
@@ -163,7 +163,7 @@
 //
 //
 // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR2]] {
+// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i64, align 8
@@ -177,7 +177,7 @@
 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8
 // CHECK-NEXT:    [[MUL:%.*]] = mul i64 1, [[TMP2]]
 // CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[MUL]] to i32
-// CHECK-NEXT:    call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
+// CHECK-NEXT:    call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret([[STRUCT_MYITERATOR]]) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]])
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[CALL]], i32* [[TMP3]], align 4
diff --git a/clang/test/OpenMP/irbuilder_for_unsigned.c b/clang/test/OpenMP/irbuilder_for_unsigned.c
index 031b329..581b350 100644
--- a/clang/test/OpenMP/irbuilder_for_unsigned.c
+++ b/clang/test/OpenMP/irbuilder_for_unsigned.c
@@ -14,7 +14,7 @@
 
 #endif // HEADER
 // CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned
-// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) [[ATTR0:#.*]] {
+// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -46,8 +46,8 @@
 // CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT]], 1
 // CHECK-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND]], align 4
 // CHECK-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
-// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
@@ -89,16 +89,16 @@
 // CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
 // CHECK:       omp_loop.exit:
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM9]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
 // CHECK:       omp_loop.after:
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR1:#.*]] {
+// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
@@ -135,7 +135,7 @@
 //
 //
 // CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR1]] {
+// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8
 // CHECK-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
index 552455e..158a04b 100644
--- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -12,10 +12,10 @@
 
 // ALL-LABEL: @_Z17nested_parallel_0v(
 // ALL-NEXT:  entry:
-// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // ALL:       omp_parallel:
-// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
 // ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
 // ALL:       omp.par.outlined.exit12:
 // ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -39,10 +39,10 @@
 // ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
 // ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
 // ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // ALL:       omp_parallel:
-// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
 // ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
 // ALL:       omp.par.outlined.exit13:
 // ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -67,10 +67,10 @@
 // ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
 // ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
 // ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // ALL:       omp_parallel:
-// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
 // ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
 // ALL:       omp.par.outlined.exit55:
 // ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
index 9a60302..7774236 100644
--- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c
+++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
@@ -11,10 +11,10 @@
 
 // CHECK-LABEL: @_Z14parallel_for_0v(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK:       omp_parallel:
-// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*))
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*))
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK:       omp.par.outlined.exit:
 // CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -23,15 +23,15 @@
 //
 // CHECK-DEBUG-LABEL: @_Z14parallel_for_0v(
 // CHECK-DEBUG-NEXT:  entry:
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]), [[DBG12:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG12:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK-DEBUG:       omp_parallel:
-// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), [[DBG13:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG13:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK-DEBUG:       omp.par.outlined.exit:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK-DEBUG:       omp.par.exit.split:
-// CHECK-DEBUG-NEXT:    ret void, [[DBG17:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG17:![0-9]+]]
 //
 void parallel_for_0(void) {
 #pragma omp parallel
@@ -50,10 +50,10 @@
 // CHECK-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK:       omp_parallel:
-// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
 // CHECK:       omp.par.outlined.exit16:
 // CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -66,20 +66,20 @@
 // CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
 // CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], [[META72:metadata !.*]], metadata !DIExpression()), [[DBG73:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], [[META74:metadata !.*]], metadata !DIExpression()), [[DBG75:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], [[META76:metadata !.*]], metadata !DIExpression()), [[DBG77:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB6:@.*]]), [[DBG78:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG77:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK-DEBUG:       omp_parallel:
-// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB6]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), [[DBG79:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG78:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
 // CHECK-DEBUG:       omp.par.outlined.exit16:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK-DEBUG:       omp.par.exit.split:
-// CHECK-DEBUG-NEXT:    ret void, [[DBG81:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG80:![0-9]+]]
 //
 void parallel_for_1(float *r, int a, double b) {
 #pragma omp parallel
@@ -110,10 +110,10 @@
 // CHECK-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK:       omp_parallel:
-// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
 // CHECK:       omp.par.outlined.exit184:
 // CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -132,8 +132,8 @@
 // CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1
 // CHECK-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4
 // CHECK-NEXT:    store i32 1, i32* [[P_STRIDE206]], align 4
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1)
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1)
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
@@ -160,9 +160,9 @@
 // CHECK-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER191]]
 // CHECK:       omp_loop.exit195:
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]])
-// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM208]])
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]]
 // CHECK:       omp_loop.after196:
 // CHECK-NEXT:    ret void
@@ -181,68 +181,68 @@
 // CHECK-DEBUG-NEXT:    [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    [[P_STRIDE206:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], [[META133:metadata !.*]], metadata !DIExpression()), [[DBG134:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], [[META135:metadata !.*]], metadata !DIExpression()), [[DBG136:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], [[META137:metadata !.*]], metadata !DIExpression()), [[DBG138:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB13:@.*]]), [[DBG139:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG138:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK-DEBUG:       omp_parallel:
-// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB13]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), [[DBG140:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG139:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
 // CHECK-DEBUG:       omp.par.outlined.exit184:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK-DEBUG:       omp.par.exit.split:
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[I185]], [[META144:metadata !.*]], metadata !DIExpression()), [[DBG147:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    store i32 0, i32* [[I185]], align 4, [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, [[DBG148:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    store i32* [[I185]], i32** [[TMP0]], align 8, [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, [[DBG149:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], i32* [[TMP1]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[I185]], align 4, !dbg [[DBG146]]
+// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG147:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG148:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.preheader190:
-// CHECK-DEBUG-NEXT:    store i32 0, i32* [[P_LOWERBOUND204]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, [[DBG148]]
-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    store i32 1, i32* [[P_STRIDE206]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB42:@.*]]), [[DBG148]]
-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* [[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, [[DBG148]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.header191:
-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], [[DBG148]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.cond192:
-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], [[DBG148]]
-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.body193:
-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], [[DBG148]]
-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, [[DBG150:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, [[DBG150]]
-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, [[DBG151:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], [[DBG152:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, [[DBG150]]
-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, [[DBG153:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    store float [[CONV202]], float* [[TMP11]], align 4, [[DBG154:!dbg !.*]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG149:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG149]]
+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG151:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG149]]
+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG152:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG153:![0-9]+]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.inc194:
-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, [[DBG148]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.exit195:
-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), [[DBG148]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB42]]), [[DBG151]]
-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB43:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), [[DBG151]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], [[DBG148]]
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]]
+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG147]]
 // CHECK-DEBUG:       omp_loop.after196:
-// CHECK-DEBUG-NEXT:    ret void, [[DBG155:!dbg !.*]]
+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG154:![0-9]+]]
 //
 void parallel_for_2(float *r, int a, double b) {
 #pragma omp parallel
diff --git a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp
index 496ac07..7ed1ae3 100644
--- a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp
@@ -91,9 +91,9 @@
   return 0;
 }
 
-// DEVICE: define weak{{.*}} void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l[[@LINE-7]](i32* nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
-// DEVICE: [[C:%.+]] = load i32, i32* [[C_ADDR]],
-// DEVICE: store i32 [[C]], i32* %
+// DEVICE-DAG: define weak{{.*}} void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l[[@LINE-7]](i32* nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
+// DEVICE-DAG: [[C:%.+]] = load i32, i32* [[C_ADDR]],
+// DEVICE-DAG: store i32 [[C]], i32* %
 
 // HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l[[@LINE-11]](i32* nonnull align {{[0-9]+}} dereferenceable{{.*}})
 // HOST: [[C:%.*]] = load i32, i32* @[[C_ADDR]],
diff --git a/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp b/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp
index e632451..9f685e8 100644
--- a/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp
+++ b/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp
@@ -6,8 +6,8 @@
 
 // CHECK-DAG: @_Z3barv
 // CHECK-DAG: @_Z3bazv
-// CHECK-DAG: @"_Z53bar$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"
-// CHECK-DAG: @"_Z53baz$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"
+// CHECK-DAG: define{{.*}} @"_Z53bar$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"
+// CHECK-DAG: define{{.*}} @"_Z53baz$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"
 // CHECK-DAG: call i32 @"_Z53bar$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"()
 // CHECK-DAG: call i32 @"_Z53baz$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"()
 
diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
index 4f689ebd..7ecfeae 100644
--- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
@@ -60,25 +60,14 @@
 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
 // CHECK1-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK1-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP1]], i64 0)
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT:    call void @_Z4workv() #[[ATTR7]]
+// CHECK1-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0)
 // CHECK1-NEXT:    ret void
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker
-// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
@@ -122,7 +111,7 @@
 // CHECK1-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
 // CHECK1-NEXT:    br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
 // CHECK1:       .worker:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR3:[0-9]+]]
 // CHECK1-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK1:       .mastercheck:
 // CHECK1-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -150,6 +139,32 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT:    call void @_Z4workv() #[[ATTR7]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
+// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
+// CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK1-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK1-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK1-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21
 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK2-NEXT:  entry:
@@ -185,25 +200,14 @@
 // CHECK2-SAME: () #[[ATTR2:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
 // CHECK2-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP1]], i32 0)
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT:    call void @_Z4workv() #[[ATTR7]]
+// CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i32 0)
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker
-// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK2-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 4
 // CHECK2-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
@@ -247,7 +251,7 @@
 // CHECK2-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
 // CHECK2-NEXT:    br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
 // CHECK2:       .worker:
-// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR3:[0-9]+]]
 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK2:       .mastercheck:
 // CHECK2-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -275,6 +279,32 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    call void @_Z4workv() #[[ATTR7]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
+// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK2-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK2-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21
 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
@@ -310,25 +340,14 @@
 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
 // CHECK3-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP1]], i32 0)
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT:    call void @_Z4workv() #[[ATTR7]]
+// CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i32 0)
 // CHECK3-NEXT:    ret void
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker
-// CHECK3-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 4
 // CHECK3-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
@@ -372,7 +391,7 @@
 // CHECK3-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
 // CHECK3-NEXT:    br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
 // CHECK3:       .worker:
-// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR4:[0-9]+]]
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR3:[0-9]+]]
 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK3:       .mastercheck:
 // CHECK3-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -399,3 +418,29 @@
 // CHECK3:       .exit:
 // CHECK3-NEXT:    ret void
 //
+//
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT:    call void @_Z4workv() #[[ATTR7]]
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
+// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
+// CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK3-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK3-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+// CHECK3-NEXT:    ret void
+//
diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
index 473150b..600d71a 100644
--- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
@@ -59,18 +59,11 @@
 // CHECK1-NEXT:    [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*)
 // CHECK1-NEXT:    br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]]
 // CHECK1:       .execute.fn:
-// CHECK1-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5:[0-9]+]]
+// CHECK1-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR4:[0-9]+]]
 // CHECK1-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
 // CHECK1:       .check.next:
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 8
-// CHECK1-NEXT:    [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*)
-// CHECK1-NEXT:    br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]]
-// CHECK1:       .execute.fn2:
-// CHECK1-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5]]
-// CHECK1-NEXT:    br label [[DOTTERMINATE_PARALLEL]]
-// CHECK1:       .check.next3:
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
-// CHECK1-NEXT:    call void [[TMP7]](i16 0, i32 [[TMP4]])
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
+// CHECK1-NEXT:    call void [[TMP6]](i16 0, i32 [[TMP4]])
 // CHECK1-NEXT:    br label [[DOTTERMINATE_PARALLEL]]
 // CHECK1:       .terminate.parallel:
 // CHECK1-NEXT:    call void @__kmpc_kernel_end_parallel()
@@ -96,7 +89,7 @@
 // CHECK1-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
 // CHECK1-NEXT:    br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
 // CHECK1:       .worker:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR5]]
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK1:       .mastercheck:
 // CHECK1-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -121,7 +114,7 @@
 // CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP0]] to i8*
 // CHECK1-NEXT:    store i8* [[TMP8]], i8** [[TMP7]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK1-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP9]], i64 1)
+// CHECK1-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP9]], i64 1)
 // CHECK1-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
 // CHECK1:       .termination.notifier:
 // CHECK1-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
@@ -136,18 +129,51 @@
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
 // CHECK1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
 // CHECK1-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
-// CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
+// CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 2)
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8*
 // CHECK1-NEXT:    store i8* [[TMP2]], i8** [[TMP1]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK1-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP3]], i64 1)
+// CHECK1-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 1)
 // CHECK1-NEXT:    ret void
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8
+// CHECK1-NEXT:    call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
+// CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK1-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK1-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -187,7 +213,7 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
 // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
@@ -202,40 +228,7 @@
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32***
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 8
-// CHECK1-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR5]]
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8
-// CHECK1-NEXT:    call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]]
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-// CHECK1-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-// CHECK1-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
-// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
-// CHECK1-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -265,18 +258,11 @@
 // CHECK2-NEXT:    [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*)
 // CHECK2-NEXT:    br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]]
 // CHECK2:       .execute.fn:
-// CHECK2-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5:[0-9]+]]
+// CHECK2-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR4:[0-9]+]]
 // CHECK2-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
 // CHECK2:       .check.next:
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 4
-// CHECK2-NEXT:    [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*)
-// CHECK2-NEXT:    br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]]
-// CHECK2:       .execute.fn2:
-// CHECK2-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5]]
-// CHECK2-NEXT:    br label [[DOTTERMINATE_PARALLEL]]
-// CHECK2:       .check.next3:
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
-// CHECK2-NEXT:    call void [[TMP7]](i16 0, i32 [[TMP4]])
+// CHECK2-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
+// CHECK2-NEXT:    call void [[TMP6]](i16 0, i32 [[TMP4]])
 // CHECK2-NEXT:    br label [[DOTTERMINATE_PARALLEL]]
 // CHECK2:       .terminate.parallel:
 // CHECK2-NEXT:    call void @__kmpc_kernel_end_parallel()
@@ -302,7 +288,7 @@
 // CHECK2-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
 // CHECK2-NEXT:    br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
 // CHECK2:       .worker:
-// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR5]]
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK2:       .mastercheck:
 // CHECK2-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -327,7 +313,7 @@
 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP0]] to i8*
 // CHECK2-NEXT:    store i8* [[TMP8]], i8** [[TMP7]], align 4
 // CHECK2-NEXT:    [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP9]], i32 1)
+// CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP9]], i32 1)
 // CHECK2-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
 // CHECK2:       .termination.notifier:
 // CHECK2-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
@@ -342,18 +328,51 @@
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 4
 // CHECK2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
 // CHECK2-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 4
-// CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
+// CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 2)
 // CHECK2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8*
 // CHECK2-NEXT:    store i8* [[TMP2]], i8** [[TMP1]], align 4
 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP3]], i32 1)
+// CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1)
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
+// CHECK2-NEXT:    call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK2-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
+// CHECK2-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -393,7 +412,7 @@
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
 // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
@@ -408,40 +427,7 @@
 // CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32***
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4
-// CHECK2-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR5]]
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
-// CHECK2-NEXT:    call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]]
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-// CHECK2-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
-// CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
-// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
-// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
-// CHECK2-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -471,18 +457,11 @@
 // CHECK3-NEXT:    [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*)
 // CHECK3-NEXT:    br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]]
 // CHECK3:       .execute.fn:
-// CHECK3-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5:[0-9]+]]
+// CHECK3-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR4:[0-9]+]]
 // CHECK3-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
 // CHECK3:       .check.next:
-// CHECK3-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 4
-// CHECK3-NEXT:    [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*)
-// CHECK3-NEXT:    br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]]
-// CHECK3:       .execute.fn2:
-// CHECK3-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5]]
-// CHECK3-NEXT:    br label [[DOTTERMINATE_PARALLEL]]
-// CHECK3:       .check.next3:
-// CHECK3-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
-// CHECK3-NEXT:    call void [[TMP7]](i16 0, i32 [[TMP4]])
+// CHECK3-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
+// CHECK3-NEXT:    call void [[TMP6]](i16 0, i32 [[TMP4]])
 // CHECK3-NEXT:    br label [[DOTTERMINATE_PARALLEL]]
 // CHECK3:       .terminate.parallel:
 // CHECK3-NEXT:    call void @__kmpc_kernel_end_parallel()
@@ -508,7 +487,7 @@
 // CHECK3-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
 // CHECK3-NEXT:    br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
 // CHECK3:       .worker:
-// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR5]]
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
 // CHECK3:       .mastercheck:
 // CHECK3-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -533,7 +512,7 @@
 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP0]] to i8*
 // CHECK3-NEXT:    store i8* [[TMP8]], i8** [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP9]], i32 1)
+// CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP9]], i32 1)
 // CHECK3-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
 // CHECK3:       .termination.notifier:
 // CHECK3-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
@@ -548,18 +527,51 @@
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 4
 // CHECK3-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
 // CHECK3-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 4
-// CHECK3-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
+// CHECK3-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 2)
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8*
 // CHECK3-NEXT:    store i8* [[TMP2]], i8** [[TMP1]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-// CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP3]], i32 1)
+// CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1)
 // CHECK3-NEXT:    ret void
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
+// CHECK3-NEXT:    call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]]
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
+// CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK3-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK3-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
+// CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
+// CHECK3-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
+// CHECK3-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
+// CHECK3-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]]
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -599,7 +611,7 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
 // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
@@ -614,39 +626,6 @@
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32***
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4
-// CHECK3-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR5]]
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
-// CHECK3-NEXT:    call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]]
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-// CHECK3-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-// CHECK3-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
-// CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
-// CHECK3-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
-// CHECK3-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
-// CHECK3-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]]
 // CHECK3-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 89b5b55..d809beb 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -763,51 +763,6 @@
 // CHECK1-NEXT:    ret i32 [[TMP20]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 8
-// CHECK1-NEXT:    [[TMP:%.*]] = alloca double*, align 8
-// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT:    store i32* [[F]], i32** [[F_ADDR]], align 8
-// CHECK1-NEXT:    store double* [[A]], double** [[A_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8
-// CHECK1-NEXT:    store double* [[TMP1]], double** [[TMP]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load double*, double** [[TMP]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8
-// CHECK1-NEXT:    [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
-// CHECK1-NEXT:    [[CONV:%.*]] = fptosi double [[ADD]] to i32
-// CHECK1-NEXT:    store i32 [[CONV]], i32* [[TMP0]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-// CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-// CHECK1-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-// CHECK1-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
-// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double**
-// CHECK1-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8
-// CHECK1-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2]]
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_worker
 // CHECK1-SAME: () #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
@@ -976,6 +931,51 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 8
+// CHECK1-NEXT:    [[TMP:%.*]] = alloca double*, align 8
+// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[F]], i32** [[F_ADDR]], align 8
+// CHECK1-NEXT:    store double* [[A]], double** [[A_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8
+// CHECK1-NEXT:    store double* [[TMP1]], double** [[TMP]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load double*, double** [[TMP]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8
+// CHECK1-NEXT:    [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
+// CHECK1-NEXT:    [[CONV:%.*]] = fptosi double [[ADD]] to i32
+// CHECK1-NEXT:    store i32 [[CONV]], i32* [[TMP0]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
+// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
+// CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK1-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK1-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK1-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1
+// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double**
+// CHECK1-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8
+// CHECK1-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2]]
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25
 // CHECK2-SAME: (i32* [[PTR1:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK2-NEXT:  entry:
@@ -1592,51 +1592,6 @@
 // CHECK2-NEXT:    ret i32 [[TMP20]]
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca i32*, align 4
-// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 4
-// CHECK2-NEXT:    [[TMP:%.*]] = alloca double*, align 4
-// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT:    store i32* [[F]], i32** [[F_ADDR]], align 4
-// CHECK2-NEXT:    store double* [[A]], double** [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4
-// CHECK2-NEXT:    store double* [[TMP1]], double** [[TMP]], align 4
-// CHECK2-NEXT:    [[TMP2:%.*]] = load double*, double** [[TMP]], align 4
-// CHECK2-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8
-// CHECK2-NEXT:    [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
-// CHECK2-NEXT:    [[CONV:%.*]] = fptosi double [[ADD]] to i32
-// CHECK2-NEXT:    store i32 [[CONV]], i32* [[TMP0]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-// CHECK2-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
-// CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
-// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
-// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
-// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double**
-// CHECK2-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4
-// CHECK2-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2]]
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_worker
 // CHECK2-SAME: () #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
@@ -1804,6 +1759,51 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 4
+// CHECK2-NEXT:    [[TMP:%.*]] = alloca double*, align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[F]], i32** [[F_ADDR]], align 4
+// CHECK2-NEXT:    store double* [[A]], double** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4
+// CHECK2-NEXT:    store double* [[TMP1]], double** [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load double*, double** [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8
+// CHECK2-NEXT:    [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
+// CHECK2-NEXT:    [[CONV:%.*]] = fptosi double [[ADD]] to i32
+// CHECK2-NEXT:    store i32 [[CONV]], i32* [[TMP0]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
+// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK2-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK2-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1
+// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double**
+// CHECK2-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4
+// CHECK2-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2]]
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25
 // CHECK3-SAME: (i32* [[PTR1:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
@@ -2420,51 +2420,6 @@
 // CHECK3-NEXT:    ret i32 [[TMP20]]
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    [[F_ADDR:%.*]] = alloca i32*, align 4
-// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 4
-// CHECK3-NEXT:    [[TMP:%.*]] = alloca double*, align 4
-// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT:    store i32* [[F]], i32** [[F_ADDR]], align 4
-// CHECK3-NEXT:    store double* [[A]], double** [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4
-// CHECK3-NEXT:    store double* [[TMP1]], double** [[TMP]], align 4
-// CHECK3-NEXT:    [[TMP2:%.*]] = load double*, double** [[TMP]], align 4
-// CHECK3-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8
-// CHECK3-NEXT:    [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
-// CHECK3-NEXT:    [[CONV:%.*]] = fptosi double [[ADD]] to i32
-// CHECK3-NEXT:    store i32 [[CONV]], i32* [[TMP0]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
-// CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-// CHECK3-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-// CHECK3-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
-// CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
-// CHECK3-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
-// CHECK3-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
-// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1
-// CHECK3-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double**
-// CHECK3-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4
-// CHECK3-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2]]
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_worker
 // CHECK3-SAME: () #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
@@ -2631,3 +2586,48 @@
 // CHECK3:       .exit:
 // CHECK3-NEXT:    ret void
 //
+//
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    [[F_ADDR:%.*]] = alloca i32*, align 4
+// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 4
+// CHECK3-NEXT:    [[TMP:%.*]] = alloca double*, align 4
+// CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT:    store i32* [[F]], i32** [[F_ADDR]], align 4
+// CHECK3-NEXT:    store double* [[A]], double** [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4
+// CHECK3-NEXT:    store double* [[TMP1]], double** [[TMP]], align 4
+// CHECK3-NEXT:    [[TMP2:%.*]] = load double*, double** [[TMP]], align 4
+// CHECK3-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8
+// CHECK3-NEXT:    [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
+// CHECK3-NEXT:    [[CONV:%.*]] = fptosi double [[ADD]] to i32
+// CHECK3-NEXT:    store i32 [[CONV]], i32* [[TMP0]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
+// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
+// CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
+// CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
+// CHECK3-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK3-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
+// CHECK3-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
+// CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
+// CHECK3-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
+// CHECK3-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
+// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1
+// CHECK3-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double**
+// CHECK3-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4
+// CHECK3-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2]]
+// CHECK3-NEXT:    ret void
+//
diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp
index f42aa69..9abcd21 100644
--- a/clang/test/OpenMP/parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_codegen.cpp
@@ -2468,15 +2468,8 @@
 // CHECK2-NEXT:    unreachable
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_Z3foov
-// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    call void @_Z8mayThrowv()
-// CHECK3-NEXT:    ret i32 0
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_Z17with_var_schedulev
-// CHECK3-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A:%.*]] = alloca double, align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
@@ -2494,7 +2487,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined.
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2603,7 +2596,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z23without_schedule_clausePfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -2618,7 +2611,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2712,7 +2705,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -2727,7 +2720,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2821,7 +2814,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z14static_chunkedPfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -2836,7 +2829,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2947,7 +2940,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z8dynamic1PfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -2962,7 +2955,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3049,7 +3042,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z7guided7PfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -3064,7 +3057,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3151,7 +3144,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z9test_autoPfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -3170,7 +3163,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[Y:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[Y:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3308,7 +3301,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z7runtimePfS_S_S_
-// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
@@ -3325,7 +3318,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3426,8 +3419,15 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_Z3foov
+// CHECK3-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    call void @_Z8mayThrowv()
+// CHECK3-NEXT:    ret i32 0
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@_Z12parallel_forPfi
-// CHECK3-SAME: (float* [[A:%.*]], i32 [[N:%.*]]) #[[ATTR2]] {
+// CHECK3-SAME: (float* [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
@@ -3453,7 +3453,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], i64 [[VLA:%.*]], i64 [[N:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], i64 [[VLA:%.*]], i64 [[N:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3577,7 +3577,7 @@
 //
 // CHECK3-LABEL: define {{[^@]+}}@__clang_call_terminate
 // CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] comdat {
-// CHECK3-NEXT:    [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR4:[0-9]+]]
+// CHECK3-NEXT:    [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    call void @_ZSt9terminatev() #[[ATTR7]]
 // CHECK3-NEXT:    unreachable
 //
diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
index d2e9a65..7cf64d2 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
@@ -50,28 +50,8 @@
 
 
 #endif
-// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK1-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ei
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
-// CHECK1-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@main
-// CHECK1-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK1-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
@@ -131,7 +111,7 @@
 // CHECK1:       omp_if.else:
 // CHECK1-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
@@ -141,7 +121,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -185,7 +165,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -261,7 +241,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -301,7 +281,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -377,7 +357,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR3]] {
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -474,7 +454,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..7
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -635,7 +615,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -672,7 +652,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -741,7 +721,7 @@
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51
 // CHECK1-NEXT:    store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !51
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51
-// CHECK1-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4) #[[ATTR4]]
+// CHECK1-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4) #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK1-NEXT:    br i1 [[TMP27]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK1:       .cancel.exit.i:
@@ -749,7 +729,7 @@
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__9_EXIT:%.*]]
 // CHECK1:       .cancel.continue.i:
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51
-// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i32 4) #[[ATTR4]]
+// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i32 4) #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK1-NEXT:    br i1 [[TMP30]], label [[DOTCANCEL_EXIT2_I:%.*]], label [[DOTCANCEL_CONTINUE3_I:%.*]]
 // CHECK1:       .cancel.exit2.i:
@@ -768,8 +748,28 @@
 // CHECK1-NEXT:    ret i32 0
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK1-SAME: () #[[ATTR7:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ei
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8:[0-9]+]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
+// CHECK1-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC2Ei
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8]] align 2 {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
@@ -793,7 +793,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
-// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -864,7 +864,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..13
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -970,34 +970,14 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_parallel_master_taskloop_codegen.cpp
-// CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK1-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    call void @__cxx_global_var_init()
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK2-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ei
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
-// CHECK2-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@main
-// CHECK2-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK2-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
@@ -1057,7 +1037,7 @@
 // CHECK2:       omp_if.else:
 // CHECK2-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
 // CHECK2-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
@@ -1067,7 +1047,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1111,7 +1091,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1187,7 +1167,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1227,7 +1207,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1303,7 +1283,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR3]] {
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1400,7 +1380,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..7
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1561,7 +1541,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1598,7 +1578,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1667,7 +1647,7 @@
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51
 // CHECK2-NEXT:    store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !51
 // CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51
-// CHECK2-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4) #[[ATTR4]]
+// CHECK2-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4) #[[ATTR2]]
 // CHECK2-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK2-NEXT:    br i1 [[TMP27]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK2:       .cancel.exit.i:
@@ -1675,7 +1655,7 @@
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__9_EXIT:%.*]]
 // CHECK2:       .cancel.continue.i:
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51
-// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i32 4) #[[ATTR4]]
+// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i32 4) #[[ATTR2]]
 // CHECK2-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK2-NEXT:    br i1 [[TMP30]], label [[DOTCANCEL_EXIT2_I:%.*]], label [[DOTCANCEL_CONTINUE3_I:%.*]]
 // CHECK2:       .cancel.exit2.i:
@@ -1694,8 +1674,28 @@
 // CHECK2-NEXT:    ret i32 0
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK2-SAME: () #[[ATTR7:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ei
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8:[0-9]+]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
+// CHECK2-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@_ZN1SC2Ei
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8]] align 2 {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
@@ -1719,7 +1719,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
-// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1790,7 +1790,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..13
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1896,7 +1896,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_parallel_master_taskloop_codegen.cpp
-// CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK2-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @__cxx_global_var_init()
 // CHECK2-NEXT:    ret void
diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
index 01a417d..a29e8dd 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
@@ -1743,28 +1743,8 @@
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK3-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SC1Ei
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
-// CHECK3-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@main
-// CHECK3-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK3-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
@@ -1824,7 +1804,7 @@
 // CHECK3:       omp_if.else:
 // CHECK3-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR4:[0-9]+]]
+// CHECK3-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
@@ -1833,7 +1813,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined.
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1877,7 +1857,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1953,7 +1933,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1993,7 +1973,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2069,7 +2049,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2177,7 +2157,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_privates_map.
-// CHECK3-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -2191,7 +2171,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..7
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2258,7 +2238,7 @@
 // CHECK3-NEXT:    [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46
 // CHECK3-NEXT:    [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)*
-// CHECK3-NEXT:    call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
+// CHECK3-NEXT:    call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8
 // CHECK3-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !46
@@ -2418,7 +2398,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_dup.
-// CHECK3-SAME: (%struct.kmp_task_t_with_privates.3* [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR6]] {
+// CHECK3-SAME: (%struct.kmp_task_t_with_privates.3* [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR4]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8
@@ -2435,8 +2415,28 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK3-SAME: () #[[ATTR6]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SC1Ei
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8:[0-9]+]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
+// CHECK3-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@_ZN1SC2Ei
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8]] align 2 {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
@@ -2460,7 +2460,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8
-// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2531,7 +2531,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2637,34 +2637,14 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_parallel_master_taskloop_simd_codegen.cpp
-// CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK3-SAME: () #[[ATTR6]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    call void @__cxx_global_var_init()
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK4-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SC1Ei
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
-// CHECK4-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
-// CHECK4-NEXT:    ret void
-//
-//
 // CHECK4-LABEL: define {{[^@]+}}@main
-// CHECK4-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK4-SAME: (i32 [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
@@ -2724,7 +2704,7 @@
 // CHECK4:       omp_if.else:
 // CHECK4-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
-// CHECK4-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR4:[0-9]+]]
+// CHECK4-NEXT:    call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]]
 // CHECK4-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
@@ -2733,7 +2713,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined.
-// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2777,7 +2757,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2853,7 +2833,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2
-// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2893,7 +2873,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2969,7 +2949,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5
-// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR3]] {
+// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]], i64 [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3077,7 +3057,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_privates_map.
-// CHECK4-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -3091,7 +3071,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..7
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3158,7 +3138,7 @@
 // CHECK4-NEXT:    [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46
 // CHECK4-NEXT:    [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46
 // CHECK4-NEXT:    [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)*
-// CHECK4-NEXT:    call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
+// CHECK4-NEXT:    call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]]
 // CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8
 // CHECK4-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !46
@@ -3318,7 +3298,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_dup.
-// CHECK4-SAME: (%struct.kmp_task_t_with_privates.3* [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR6]] {
+// CHECK4-SAME: (%struct.kmp_task_t_with_privates.3* [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR4]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8
 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8
@@ -3335,8 +3315,28 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK4-SAME: () #[[ATTR6]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    call void @_ZN1SC1Ei(%struct.S* nonnull dereferenceable(4) @s, i32 1)
+// CHECK4-NEXT:    ret void
+//
+//
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SC1Ei
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8:[0-9]+]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    store i32 [[C]], i32* [[C_ADDR]], align 4
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
+// CHECK4-NEXT:    call void @_ZN1SC2Ei(%struct.S* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@_ZN1SC2Ei
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[C:%.*]]) unnamed_addr #[[ATTR8]] align 2 {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK4-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
@@ -3360,7 +3360,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..8
-// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
+// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3431,7 +3431,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR6]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3537,7 +3537,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_parallel_master_taskloop_simd_codegen.cpp
-// CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK4-SAME: () #[[ATTR6]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    call void @__cxx_global_var_init()
 // CHECK4-NEXT:    ret void
diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
index 666421d..8a8b556 100644
--- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
+++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
@@ -4,20 +4,18 @@
 
 // host-no-diagnostics
 
-void bar1(void) {    // all-remark {{[OMP100] Potentially unknown OpenMP target region caller}}
+void bar1(void) {
 #pragma omp parallel // #0
                      // all-remark@#0 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // safe-remark@#0 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
-                     // force-remark@#0 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}}
+                     // safe-remark@#0 {{Parallel region is used in unexpected ways; will not attempt to rewrite the state machine.}}
                      // force-remark@#0 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: <NONE>}}
   {
   }
 }
-void bar2(void) {    // all-remark {{[OMP100] Potentially unknown OpenMP target region caller}}
+void bar2(void) {
 #pragma omp parallel // #1
                      // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // safe-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
-                     // force-remark@#1 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}}
+                     // safe-remark@#1 {{Parallel region is used in unexpected ways; will not attempt to rewrite the state machine.}}
                      // force-remark@#1 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__6_wrapper, kernel ID: <NONE>}}
   {
   }
@@ -26,7 +24,7 @@
 void foo1(void) {
 #pragma omp target teams // #2
                          // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
-                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #3
                      // all-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
@@ -36,7 +34,7 @@
     bar1();
 #pragma omp parallel // #4
                      // all-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}}
     {
     }
   }
@@ -45,18 +43,18 @@
 void foo2(void) {
 #pragma omp target teams // #5
                          // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
-                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__4_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #6
                      // all-remark@#6 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
+                     // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__4_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
     bar2();
 #pragma omp parallel // #7
                      // all-remark@#7 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
+                     // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
@@ -66,19 +64,19 @@
 
 void foo3(void) {
 #pragma omp target teams // #8
-                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading}}
-                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__8_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #9
                      // all-remark@#9 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading}}
+                     // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
     bar2();
 #pragma omp parallel // #10
                      // all-remark@#10 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading}}
+                     // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__8_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
index 117d373..3d0d527 100644
--- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
+++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -4,10 +4,10 @@
 
 // host-no-diagnostics
 
-void bar(void) {     // expected-remark {{[OMP100] Potentially unknown OpenMP target region caller}}
+void bar(void) {
 #pragma omp parallel // #1                                                                                                                                                                                                                                                                                                                                           \
                      // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+                     // expected-remark@#1 {{Parallel region is used in unexpected ways; will not attempt to rewrite the state machine.}}
   {
   }
 }
@@ -15,7 +15,7 @@
 void foo(void) {
 #pragma omp target teams // #2                                                                                                                                                                      \
                          // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} \
-                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #3                                                                                                                                                                                                                                                                                                                                           \
                      // expected-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
@@ -25,7 +25,7 @@
     bar();
 #pragma omp parallel // #4                                                                                                                                                                                                                                                                                                                                           \
                      // expected-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
+                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}}
     {
     }
   }
diff --git a/clang/test/OpenMP/sections_firstprivate_codegen.cpp b/clang/test/OpenMP/sections_firstprivate_codegen.cpp
index d54c6d0..863c4268 100644
--- a/clang/test/OpenMP/sections_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/sections_firstprivate_codegen.cpp
@@ -175,6 +175,28 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK1-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -213,6 +235,23 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK1-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -650,45 +689,6 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK1-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK1-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_sections_firstprivate_codegen.cpp
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -726,6 +726,28 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK2-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -764,6 +786,23 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK2-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -1201,45 +1240,6 @@
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK2-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_sections_firstprivate_codegen.cpp
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -1277,6 +1277,28 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK3-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1315,6 +1337,23 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK3-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1406,45 +1445,6 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK3-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK3-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_sections_firstprivate_codegen.cpp
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1482,6 +1482,28 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK4-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK4-NEXT:    ret void
+//
+//
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
@@ -1520,6 +1542,23 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK4-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK4-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK4-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
@@ -1673,45 +1712,6 @@
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK4-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK4-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK4-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK4-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
 // CHECK4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_sections_firstprivate_codegen.cpp
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp
index 4f1e3c2..2b3e71e 100644
--- a/clang/test/OpenMP/single_codegen.cpp
+++ b/clang/test/OpenMP/single_codegen.cpp
@@ -143,12 +143,16 @@
 
 
 
-// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) @tc)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT:    ret void
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
+// CHECK1-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]])
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    ret i8* [[TMP3]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
@@ -161,6 +165,17 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
+// CHECK1-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
 // CHECK1-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
 // CHECK1-NEXT:  entry:
@@ -171,7 +186,98 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*
+// CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0
+// CHECK1-NEXT:    [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
+// CHECK1-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]]
+// CHECK1:       arrayctor.loop:
+// CHECK1-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
+// CHECK1-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
+// CHECK1-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
+// CHECK1:       invoke.cont:
+// CHECK1-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
+// CHECK1-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
+// CHECK1-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
+// CHECK1:       arrayctor.cont:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    ret i8* [[TMP3]]
+// CHECK1:       lpad:
+// CHECK1-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    cleanup
+// CHECK1-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
+// CHECK1-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
+// CHECK1-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK1:       arraydestroy.body:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK1-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
+// CHECK1:       arraydestroy.done1:
+// CHECK1-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK1:       eh.resume:
+// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK1-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
+// CHECK1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK1:       arraydestroy.body:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK1-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1:       arraydestroy.done1:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) @tc)
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
 // CHECK1-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
@@ -229,112 +335,6 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
-// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
-// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
-// CHECK1-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]])
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    ret i8* [[TMP3]]
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
-// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
-// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
-// CHECK1-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3]]
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
-// CHECK1-SAME: () #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..2
-// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
-// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
-// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*
-// CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0
-// CHECK1-NEXT:    [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
-// CHECK1-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK1:       arrayctor.loop:
-// CHECK1-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
-// CHECK1-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK1-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
-// CHECK1:       invoke.cont:
-// CHECK1-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
-// CHECK1-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
-// CHECK1-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK1:       arrayctor.cont:
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    ret i8* [[TMP3]]
-// CHECK1:       lpad:
-// CHECK1-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
-// CHECK1-NEXT:    cleanup
-// CHECK1-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
-// CHECK1-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
-// CHECK1-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
-// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]]
-// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK1:       arraydestroy.body:
-// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
-// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
-// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
-// CHECK1:       arraydestroy.done1:
-// CHECK1-NEXT:    br label [[EH_RESUME:%.*]]
-// CHECK1:       eh.resume:
-// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
-// CHECK1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
-// CHECK1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
-// CHECK1-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
-// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]]
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..3
-// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
-// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
-// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2
-// CHECK1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK1:       arraydestroy.body:
-// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
-// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
-// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
-// CHECK1:       arraydestroy.done1:
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..4
-// CHECK1-SAME: () #[[ATTR0]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..2, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..3)
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
 // CHECK1-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
 // CHECK1-NEXT:  entry:
@@ -1146,9 +1146,9 @@
 // CHECK1-SAME: () #[[ATTR0]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    call void @__cxx_global_var_init()
-// CHECK1-NEXT:    call void @__cxx_global_var_init.1()
+// CHECK1-NEXT:    call void @__cxx_global_var_init.4()
 // CHECK1-NEXT:    call void @.__omp_threadprivate_init_.()
-// CHECK1-NEXT:    call void @.__omp_threadprivate_init_..4()
+// CHECK1-NEXT:    call void @.__omp_threadprivate_init_..3()
 // CHECK1-NEXT:    ret void
 //
 //
@@ -2161,18 +2161,8 @@
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
-// CHECK3-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
-// CHECK3-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) @tc)
-// CHECK3-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
-// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
 // CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
@@ -2200,7 +2190,7 @@
 // CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
 // CHECK3-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*
-// CHECK3-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3]]
+// CHECK3-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -2214,51 +2204,15 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
-// CHECK3-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK3-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
+// CHECK3-SAME: () #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
-// CHECK3-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..2, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..3)
-// CHECK3-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]]
-// CHECK3:       arrayctor.loop:
-// CHECK3-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
-// CHECK3-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK3-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
-// CHECK3:       invoke.cont:
-// CHECK3-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
-// CHECK3-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2)
-// CHECK3-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
-// CHECK3:       arrayctor.cont:
-// CHECK3-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK3-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
 // CHECK3-NEXT:    ret void
-// CHECK3:       lpad:
-// CHECK3-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
-// CHECK3-NEXT:    cleanup
-// CHECK3-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
-// CHECK3-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
-// CHECK3-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
-// CHECK3-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
-// CHECK3-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]]
-// CHECK3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
-// CHECK3:       arraydestroy.body:
-// CHECK3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
-// CHECK3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK3-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
-// CHECK3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
-// CHECK3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
-// CHECK3:       arraydestroy.done1:
-// CHECK3-NEXT:    br label [[EH_RESUME:%.*]]
-// CHECK3:       eh.resume:
-// CHECK3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
-// CHECK3-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
-// CHECK3-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
-// CHECK3-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
-// CHECK3-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]]
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..2
+// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
 // CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
@@ -2306,7 +2260,7 @@
 // CHECK3-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]]
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..3
+// CHECK3-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
 // CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
@@ -2325,6 +2279,64 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
+// CHECK3-SAME: () #[[ATTR0]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2)
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK3-SAME: () #[[ATTR0]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) @tc)
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
+// CHECK3-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK3-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]]
+// CHECK3:       arrayctor.loop:
+// CHECK3-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ]
+// CHECK3-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
+// CHECK3-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]]
+// CHECK3:       invoke.cont:
+// CHECK3-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1
+// CHECK3-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2)
+// CHECK3-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
+// CHECK3:       arrayctor.cont:
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// CHECK3-NEXT:    ret void
+// CHECK3:       lpad:
+// CHECK3-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK3-NEXT:    cleanup
+// CHECK3-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// CHECK3-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// CHECK3-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK3-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]]
+// CHECK3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK3:       arraydestroy.body:
+// CHECK3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK3-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0)
+// CHECK3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]]
+// CHECK3:       arraydestroy.done1:
+// CHECK3-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK3:       eh.resume:
+// CHECK3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK3-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK3-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK3-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK3-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]]
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
 // CHECK3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
@@ -2341,13 +2353,6 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_Z3foov
-// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    call void @_Z8mayThrowv()
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_ZN9TestClassC2Ev
 // CHECK3-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
 // CHECK3-NEXT:  entry:
@@ -2368,6 +2373,13 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_Z3foov
+// CHECK3-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    call void @_Z8mayThrowv()
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@main
 // CHECK3-SAME: () #[[ATTR6:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 // CHECK3-NEXT:  entry:
@@ -2629,7 +2641,7 @@
 // CHECK3-NEXT:    store i8* [[TMP14]], i8** [[TMP12]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
-// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.4, i32 [[TMP16]])
+// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP16]])
 // CHECK3-NEXT:    ret void
 // CHECK3:       terminate.handler:
 // CHECK3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
@@ -2656,7 +2668,7 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.4
+// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.5
 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
@@ -2698,11 +2710,11 @@
 // CHECK3-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*
 // CHECK3-NEXT:    store double [[TMP7]], double* [[CONV]], align 8
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR12]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2743,11 +2755,11 @@
 // CHECK3-NEXT:    store i8* [[TMP10]], i8** [[TMP8]], align 8
 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
-// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.6, i32 [[TMP12]])
+// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP12]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.6
+// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.7
 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
@@ -2817,11 +2829,11 @@
 // CHECK3-NEXT:    [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP8]], i32* [[CONV10]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8
 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2901,7 +2913,7 @@
 // CHECK3-NEXT:    store i8* [[TMP23]], i8** [[TMP21]], align 8
 // CHECK3-NEXT:    [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
-// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.8, i32 [[TMP25]])
+// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP25]])
 // CHECK3-NEXT:    ret void
 // CHECK3:       terminate.handler:
 // CHECK3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
@@ -2953,11 +2965,11 @@
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP21]], i32* [[CONV3]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.8
+// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9
 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
@@ -2995,7 +3007,7 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR12]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3061,11 +3073,11 @@
 // CHECK3-NEXT:    store i8* [[TMP19]], i8** [[TMP17]], align 8
 // CHECK3-NEXT:    [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4
-// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.10, i32 [[TMP21]])
+// CHECK3-NEXT:    call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP21]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.10
+// CHECK3-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11
 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR9]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
@@ -3106,11 +3118,11 @@
 // CHECK3-LABEL: define {{[^@]+}}@_Z15parallel_singlev
 // CHECK3-SAME: () #[[ATTR10]] {
 // CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*))
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*))
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3152,7 +3164,9 @@
 // CHECK3-SAME: () #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    call void @__cxx_global_var_init()
-// CHECK3-NEXT:    call void @__cxx_global_var_init.1()
+// CHECK3-NEXT:    call void @__cxx_global_var_init.4()
+// CHECK3-NEXT:    call void @.__omp_threadprivate_init_.()
+// CHECK3-NEXT:    call void @.__omp_threadprivate_init_..3()
 // CHECK3-NEXT:    ret void
 //
 //
@@ -4151,195 +4165,195 @@
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK5-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG6:![0-9]+]] {
+// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
+// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG6:![0-9]+]] {
 // CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) @tc), !dbg [[DBG8:![0-9]+]]
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG11:![0-9]+]]
-// CHECK5-NEXT:    ret void, !dbg [[DBG8]]
+// CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9:![0-9]+]]
+// CHECK5-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG9]]
+// CHECK5-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]), !dbg [[DBG10:![0-9]+]]
+// CHECK5-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9]]
+// CHECK5-NEXT:    ret i8* [[TMP3]], !dbg [[DBG9]]
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev
-// CHECK5-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG12:![0-9]+]] {
+// CHECK5-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG11:![0-9]+]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
 // CHECK5-NEXT:    store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
-// CHECK5-NEXT:    call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull dereferenceable(4) [[THIS1]]), !dbg [[DBG13:![0-9]+]]
-// CHECK5-NEXT:    ret void, !dbg [[DBG14:![0-9]+]]
-//
-//
-// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
-// CHECK5-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG15:![0-9]+]] {
-// CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
-// CHECK5-NEXT:    store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
-// CHECK5-NEXT:    [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
-// CHECK5-NEXT:    call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG16:![0-9]+]]
-// CHECK5-NEXT:    ret void, !dbg [[DBG17:![0-9]+]]
-//
-//
-// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
-// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG18:![0-9]+]] {
-// CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
-// CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG19:![0-9]+]]
-// CHECK5:       arrayctor.loop:
-// CHECK5-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG19]]
-// CHECK5-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK5-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG19]]
-// CHECK5:       invoke.cont:
-// CHECK5-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG19]]
-// CHECK5-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), !dbg [[DBG19]]
-// CHECK5-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG19]]
-// CHECK5:       arrayctor.cont:
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG21:![0-9]+]]
-// CHECK5-NEXT:    ret void, !dbg [[DBG21]]
-// CHECK5:       lpad:
-// CHECK5-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
-// CHECK5-NEXT:    cleanup, !dbg [[DBG22:![0-9]+]]
-// CHECK5-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG22]]
-// CHECK5-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG22]]
-// CHECK5-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG22]]
-// CHECK5-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG22]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]], !dbg [[DBG19]]
-// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG19]]
-// CHECK5:       arraydestroy.body:
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG19]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG19]]
-// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG19]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG19]]
-// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG19]]
-// CHECK5:       arraydestroy.done1:
-// CHECK5-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG19]]
-// CHECK5:       eh.resume:
-// CHECK5-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG19]]
-// CHECK5-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG19]]
-// CHECK5-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG19]]
-// CHECK5-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG19]]
-// CHECK5-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG19]]
-//
-//
-// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
-// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG23:![0-9]+]] {
-// CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
-// CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK5-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG24:![0-9]+]]
-// CHECK5:       arraydestroy.body:
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG24]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG24]]
-// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG24]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG24]]
-// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG24]]
-// CHECK5:       arraydestroy.done1:
-// CHECK5-NEXT:    ret void, !dbg [[DBG24]]
-//
-//
-// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
-// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG25:![0-9]+]] {
-// CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
-// CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG26:![0-9]+]]
-// CHECK5-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG26]]
-// CHECK5-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]), !dbg [[DBG27:![0-9]+]]
-// CHECK5-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG26]]
-// CHECK5-NEXT:    ret i8* [[TMP3]], !dbg [[DBG26]]
+// CHECK5-NEXT:    call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull dereferenceable(4) [[THIS1]]), !dbg [[DBG12:![0-9]+]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG13:![0-9]+]]
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
-// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG28:![0-9]+]] {
+// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG14:![0-9]+]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
 // CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG29:![0-9]+]]
-// CHECK5-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG29]]
-// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3]], !dbg [[DBG29]]
-// CHECK5-NEXT:    ret void, !dbg [[DBG30:![0-9]+]]
+// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG15:![0-9]+]]
+// CHECK5-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG15]]
+// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG15]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG16:![0-9]+]]
+//
+//
+// CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev
+// CHECK5-SAME: (%class.TestClass* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG17:![0-9]+]] {
+// CHECK5-NEXT:  entry:
+// CHECK5-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8
+// CHECK5-NEXT:    store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8
+// CHECK5-NEXT:    [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8
+// CHECK5-NEXT:    call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG18:![0-9]+]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG19:![0-9]+]]
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
-// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG31:![0-9]+]] {
+// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG20:![0-9]+]] {
 // CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG32:![0-9]+]]
-// CHECK5-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG32]]
-// CHECK5-NEXT:    ret void, !dbg [[DBG32]]
+// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG21:![0-9]+]]
+// CHECK5-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG21]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG21]]
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..2
-// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG33:![0-9]+]] {
+// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
+// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG22:![0-9]+]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
 // CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
 // CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG34:![0-9]+]]
-// CHECK5-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG34]]
-// CHECK5-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG35:![0-9]+]]
-// CHECK5-NEXT:    [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG35]]
-// CHECK5-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG35]]
+// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23:![0-9]+]]
+// CHECK5-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG23]]
+// CHECK5-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG24:![0-9]+]]
+// CHECK5-NEXT:    [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG24]]
+// CHECK5-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG24]]
 // CHECK5:       arrayctor.loop:
-// CHECK5-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG35]]
+// CHECK5-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG24]]
 // CHECK5-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
-// CHECK5-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG35]]
+// CHECK5-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG24]]
 // CHECK5:       invoke.cont:
-// CHECK5-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG35]]
-// CHECK5-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG35]]
-// CHECK5-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG35]]
+// CHECK5-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG24]]
+// CHECK5-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG24]]
+// CHECK5-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG24]]
 // CHECK5:       arrayctor.cont:
-// CHECK5-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG34]]
-// CHECK5-NEXT:    ret i8* [[TMP3]], !dbg [[DBG34]]
+// CHECK5-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23]]
+// CHECK5-NEXT:    ret i8* [[TMP3]], !dbg [[DBG23]]
 // CHECK5:       lpad:
 // CHECK5-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
-// CHECK5-NEXT:    cleanup, !dbg [[DBG36:![0-9]+]]
-// CHECK5-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG36]]
-// CHECK5-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG36]]
-// CHECK5-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG36]]
-// CHECK5-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG36]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG35]]
-// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG35]]
+// CHECK5-NEXT:    cleanup, !dbg [[DBG25:![0-9]+]]
+// CHECK5-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG25]]
+// CHECK5-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]]
+// CHECK5-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG25]]
+// CHECK5-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG25]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG24]]
+// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG24]]
 // CHECK5:       arraydestroy.body:
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG35]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG35]]
-// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG35]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG35]]
-// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG35]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG24]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG24]]
+// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG24]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG24]]
+// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG24]]
 // CHECK5:       arraydestroy.done1:
-// CHECK5-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG35]]
+// CHECK5-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG24]]
 // CHECK5:       eh.resume:
-// CHECK5-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG35]]
-// CHECK5-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG35]]
-// CHECK5-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG35]]
-// CHECK5-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG35]]
-// CHECK5-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG35]]
+// CHECK5-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG24]]
+// CHECK5-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG24]]
+// CHECK5-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG24]]
+// CHECK5-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG24]]
+// CHECK5-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG24]]
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..3
-// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG37:![0-9]+]] {
+// CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
+// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG26:![0-9]+]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
 // CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
-// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG38:![0-9]+]]
-// CHECK5-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG38]]
-// CHECK5-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG38]]
-// CHECK5-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG38]]
+// CHECK5-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG27:![0-9]+]]
+// CHECK5-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG27]]
+// CHECK5-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG27]]
+// CHECK5-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG27]]
 // CHECK5:       arraydestroy.body:
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG38]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG38]]
-// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG38]]
-// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG38]]
-// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG38]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG27]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG27]]
+// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG27]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG27]]
+// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG27]]
 // CHECK5:       arraydestroy.done1:
-// CHECK5-NEXT:    ret void, !dbg [[DBG39:![0-9]+]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG28:![0-9]+]]
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..4
-// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG40:![0-9]+]] {
+// CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
+// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG29:![0-9]+]] {
 // CHECK5-NEXT:  entry:
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG41:![0-9]+]]
-// CHECK5-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..2, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..3), !dbg [[DBG41]]
+// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG30:![0-9]+]]
+// CHECK5-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG30]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG30]]
+//
+//
+// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG31:![0-9]+]] {
+// CHECK5-NEXT:  entry:
+// CHECK5-NEXT:    call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) @tc), !dbg [[DBG32:![0-9]+]]
+// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG34:![0-9]+]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG32]]
+//
+//
+// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
+// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG35:![0-9]+]] {
+// CHECK5-NEXT:  entry:
+// CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG36:![0-9]+]]
+// CHECK5:       arrayctor.loop:
+// CHECK5-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG36]]
+// CHECK5-NEXT:    invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYCTOR_CUR]])
+// CHECK5-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG36]]
+// CHECK5:       invoke.cont:
+// CHECK5-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG36]]
+// CHECK5-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], getelementptr inbounds ([[CLASS_TESTCLASS]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), !dbg [[DBG36]]
+// CHECK5-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG36]]
+// CHECK5:       arrayctor.cont:
+// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG38:![0-9]+]]
+// CHECK5-NEXT:    ret void, !dbg [[DBG38]]
+// CHECK5:       lpad:
+// CHECK5-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK5-NEXT:    cleanup, !dbg [[DBG39:![0-9]+]]
+// CHECK5-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG39]]
+// CHECK5-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG39]]
+// CHECK5-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG39]]
+// CHECK5-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG39]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), [[ARRAYCTOR_CUR]], !dbg [[DBG36]]
+// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG36]]
+// CHECK5:       arraydestroy.body:
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG36]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG36]]
+// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG36]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG36]]
+// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG36]]
+// CHECK5:       arraydestroy.done1:
+// CHECK5-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG36]]
+// CHECK5:       eh.resume:
+// CHECK5-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG36]]
+// CHECK5-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG36]]
+// CHECK5-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG36]]
+// CHECK5-NEXT:    [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG36]]
+// CHECK5-NEXT:    resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG36]]
+//
+//
+// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG40:![0-9]+]] {
+// CHECK5-NEXT:  entry:
+// CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK5-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG41:![0-9]+]]
+// CHECK5:       arraydestroy.body:
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ getelementptr inbounds ([[CLASS_TESTCLASS:%.*]], %class.TestClass* getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG41]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG41]]
+// CHECK5-NEXT:    call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG41]]
+// CHECK5-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %class.TestClass], [2 x %class.TestClass]* @tc2, i32 0, i32 0), !dbg [[DBG41]]
+// CHECK5-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG41]]
+// CHECK5:       arraydestroy.done1:
 // CHECK5-NEXT:    ret void, !dbg [[DBG41]]
 //
 //
@@ -5154,9 +5168,9 @@
 // CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG186:![0-9]+]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG187:![0-9]+]]
-// CHECK5-NEXT:    call void @__cxx_global_var_init.1(), !dbg [[DBG187]]
+// CHECK5-NEXT:    call void @__cxx_global_var_init.4(), !dbg [[DBG187]]
 // CHECK5-NEXT:    call void @.__omp_threadprivate_init_.(), !dbg [[DBG187]]
-// CHECK5-NEXT:    call void @.__omp_threadprivate_init_..4(), !dbg [[DBG187]]
+// CHECK5-NEXT:    call void @.__omp_threadprivate_init_..3(), !dbg [[DBG187]]
 // CHECK5-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp
index 442ed09..e0d562f 100644
--- a/clang/test/OpenMP/single_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp
@@ -161,6 +161,28 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK1-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -199,6 +221,23 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK1-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -567,45 +606,6 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK1-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK1-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_firstprivate_codegen.cpp
 // CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
@@ -643,6 +643,28 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK2-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -681,6 +703,23 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK2-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -1049,45 +1088,6 @@
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK2-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK2-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_firstprivate_codegen.cpp
 // CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
@@ -1125,6 +1125,28 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK3-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1163,6 +1185,23 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK3-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1220,45 +1259,6 @@
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK3-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK3-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_firstprivate_codegen.cpp
 // CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
@@ -1296,6 +1296,28 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK4-NEXT:    store float [[CONV]], float* [[F]], align 4
+// CHECK4-NEXT:    ret void
+//
+//
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
@@ -1334,6 +1356,23 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
+// CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
+// CHECK4-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK4-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
+// CHECK4-NEXT:    store float [[ADD]], float* [[F]], align 4
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
@@ -1453,45 +1492,6 @@
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-// CHECK4-NEXT:    store float [[CONV]], float* [[F]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]], float [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
-// CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    store float [[A]], float* [[A_ADDR]], align 4
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4
-// CHECK4-NEXT:    [[TMP1:%.*]] = load volatile i32, i32* @g, align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
-// CHECK4-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]]
-// CHECK4-NEXT:    store float [[ADD]], float* [[F]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
 // CHECK4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_firstprivate_codegen.cpp
 // CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp
index 36001ee..7834112 100644
--- a/clang/test/OpenMP/task_codegen.cpp
+++ b/clang/test/OpenMP/task_codegen.cpp
@@ -151,25 +151,8 @@
 // Copy firstprivate value of `b`.
 #endif // UNTIEDRT
 #endif
-// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK1-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
-// CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
-// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
-// CHECK1-NEXT:    ret void
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@main
-// CHECK1-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[B:%.*]] = alloca i8, align 1
@@ -460,7 +443,7 @@
 // CHECK1:       arraydestroy.body:
 // CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP170]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]]
+// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]]
 // CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN32]]
 // CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE33:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK1:       arraydestroy.done33:
@@ -469,7 +452,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev
-// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
@@ -479,7 +462,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -525,7 +508,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..2
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -566,7 +549,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -612,13 +595,13 @@
 // CHECK1-NEXT:    store i32 1, i32* [[TMP13]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__3_EXIT:%.*]]
 // CHECK1:       .untied.jmp.1.i:
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
-// CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK1-NEXT:    store i32 1, i32* @a, align 4
-// CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK1-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31
 // CHECK1-NEXT:    br label [[CLEANUP_I]]
 // CHECK1:       cleanup.i:
@@ -629,7 +612,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..6
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -675,7 +658,7 @@
 // CHECK1-NEXT:    store i32 1, i32* [[TMP13]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41
-// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK1:       .untied.jmp.1.i:
 // CHECK1-NEXT:    store i32 1, i32* @a, align 4
@@ -689,7 +672,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..8
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -735,7 +718,7 @@
 // CHECK1-NEXT:    store i32 1, i32* [[TMP13]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51
-// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK1:       .untied.jmp.1.i:
 // CHECK1-NEXT:    store i32 1, i32* @a, align 4
@@ -749,7 +732,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -785,7 +768,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..12
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -821,7 +804,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..14
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -857,7 +840,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..16
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -896,7 +879,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map.
-// CHECK1-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK1-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -910,7 +893,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..19
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -947,7 +930,7 @@
 // CHECK1-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101
 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)*
-// CHECK1-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101
 // CHECK1-NEXT:    store i32 4, i32* [[TMP16]], align 128
 // CHECK1-NEXT:    store i32 4, i32* @a, align 4
@@ -960,12 +943,12 @@
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK1-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]]
 // CHECK1-NEXT:    ret void
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map..20
-// CHECK1-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S*** noalias [[TMP3:%.*]]) #[[ATTR0]] {
+// CHECK1-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S*** noalias [[TMP3:%.*]]) #[[ATTR7]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.20*, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -989,7 +972,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..21
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1030,7 +1013,7 @@
 // CHECK1-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S***)*
-// CHECK1-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    [[TMP18:%.*]] = load %struct.S**, %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111
@@ -1053,26 +1036,26 @@
 // CHECK1-NEXT:    store i32 1, i32* [[TMP22]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK1-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i8* [[TMP24]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i8* [[TMP24]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT:%.*]]
 // CHECK1:       .untied.jmp.2.i:
-// CHECK1-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK1-NEXT:    [[DOTS2__VOID_ADDR_I:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP26]], i64 4, i8* inttoptr (i64 7 to i8*)) #[[ATTR5]]
+// CHECK1-NEXT:    [[DOTS2__VOID_ADDR_I:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP26]], i64 4, i8* inttoptr (i64 7 to i8*)) #[[ATTR4]]
 // CHECK1-NEXT:    [[DOTS2__ADDR_I:%.*]] = bitcast i8* [[DOTS2__VOID_ADDR_I]] to %struct.S*
 // CHECK1-NEXT:    store %struct.S* [[DOTS2__ADDR_I]], %struct.S** [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    store i32 2, i32* [[TMP27]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK1-NEXT:    [[TMP29:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK1-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i8* [[TMP29]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i8* [[TMP29]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK1:       .untied.jmp.3.i:
-// CHECK1-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 0, i32* [[A_I]], align 4
 // CHECK1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK1-NEXT:    [[TMP32:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP32:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP33:%.*]] = bitcast i8* [[TMP32]] to %struct.kmp_task_t_with_privates.18*
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP33]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP33]], i32 0, i32 2
@@ -1080,44 +1063,44 @@
 // CHECK1-NEXT:    [[TMP37:%.*]] = load i32, i32* [[TMP16]], align 128
 // CHECK1-NEXT:    store i32 [[TMP37]], i32* [[TMP36]], align 128
 // CHECK1-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK1-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], i8* [[TMP32]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], i8* [[TMP32]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP40:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    store i32 3, i32* [[TMP40]], align 4
 // CHECK1-NEXT:    [[TMP41:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK1-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]], i8* [[TMP42]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]], i8* [[TMP42]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK1:       .untied.jmp.5.i:
 // CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK1-NEXT:    [[TMP45:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], i32 0) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], i32 0) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP46:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    store i32 4, i32* [[TMP46]], align 4
 // CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK1-NEXT:    [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], i8* [[TMP48]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], i8* [[TMP48]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK1:       .untied.jmp.7.i:
-// CHECK1-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP50:%.*]] = bitcast %struct.S* [[TMP17]] to i8*
 // CHECK1-NEXT:    [[TMP51:%.*]] = bitcast %struct.S* [[REF_TMP_I]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP50]], i8* align 4 [[TMP51]], i64 4, i1 false) #[[ATTR5]]
-// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP50]], i8* align 4 [[TMP51]], i64 4, i1 false) #[[ATTR4]]
+// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[A9_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 10, i32* [[A9_I]], align 4
 // CHECK1-NEXT:    [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK1-NEXT:    [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK1-NEXT:    store i32 5, i32* [[TMP54]], align 4
 // CHECK1-NEXT:    [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK1-NEXT:    [[TMP56:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK1-NEXT:    [[TMP57:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i8* [[TMP56]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP57:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i8* [[TMP56]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK1:       .untied.jmp.10.i:
-// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK1-NEXT:    [[TMP59:%.*]] = bitcast %struct.S* [[TMP19]] to i8*
-// CHECK1-NEXT:    call void @__kmpc_free(i32 [[TMP58]], i8* [[TMP59]], i8* inttoptr (i64 7 to i8*)) #[[ATTR5]]
-// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void @__kmpc_free(i32 [[TMP58]], i8* [[TMP59]], i8* inttoptr (i64 7 to i8*)) #[[ATTR4]]
+// CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR4]]
 // CHECK1-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111
 // CHECK1-NEXT:    br label [[CLEANUP_I]]
 // CHECK1:       cleanup.i:
@@ -1147,6 +1130,23 @@
 // CHECK1-NEXT:    ret void
 //
 //
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK1-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
+// CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
+// CHECK1-NEXT:    ret void
+//
+//
 // CHECK1-LABEL: define {{[^@]+}}@_ZN2S1C2Ev
 // CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
 // CHECK1-NEXT:  entry:
@@ -1179,7 +1179,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..23
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1272,7 +1272,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map..26
-// CHECK1-SAME: (%struct..kmp_privates.t.25* noalias [[TMP0:%.*]], double** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK1-SAME: (%struct..kmp_privates.t.25* noalias [[TMP0:%.*]], double** noalias [[TMP1:%.*]]) #[[ATTR7]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.25*, align 8
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca double**, align 8
@@ -1286,7 +1286,7 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..27
-// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.24* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.24* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1323,7 +1323,7 @@
 // CHECK1-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !131
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !131
 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)*
-// CHECK1-NEXT:    call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]]
+// CHECK1-NEXT:    call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
 // CHECK1-NEXT:    [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !131
 // CHECK1-NEXT:    [[TMP17:%.*]] = load double, double* [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP12]], i32 0, i32 0
@@ -1337,31 +1337,14 @@
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_task_codegen.cpp
-// CHECK1-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK1-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    call void @__cxx_global_var_init()
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK2-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
-// CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
-// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@main
-// CHECK2-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[B:%.*]] = alloca i8, align 1
@@ -1652,7 +1635,7 @@
 // CHECK2:       arraydestroy.body:
 // CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP170]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]]
+// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]]
 // CHECK2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN32]]
 // CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE33:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK2:       arraydestroy.done33:
@@ -1661,7 +1644,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
@@ -1671,7 +1654,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1717,7 +1700,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..2
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1758,7 +1741,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1804,13 +1787,13 @@
 // CHECK2-NEXT:    store i32 1, i32* [[TMP13]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__3_EXIT:%.*]]
 // CHECK2:       .untied.jmp.1.i:
 // CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
-// CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK2-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK2-NEXT:    store i32 1, i32* @a, align 4
-// CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK2-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK2-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31
 // CHECK2-NEXT:    br label [[CLEANUP_I]]
 // CHECK2:       cleanup.i:
@@ -1821,7 +1804,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..6
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1867,7 +1850,7 @@
 // CHECK2-NEXT:    store i32 1, i32* [[TMP13]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41
-// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK2:       .untied.jmp.1.i:
 // CHECK2-NEXT:    store i32 1, i32* @a, align 4
@@ -1881,7 +1864,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..8
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1927,7 +1910,7 @@
 // CHECK2-NEXT:    store i32 1, i32* [[TMP13]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51
 // CHECK2-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51
-// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK2:       .untied.jmp.1.i:
 // CHECK2-NEXT:    store i32 1, i32* @a, align 4
@@ -1941,7 +1924,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -1977,7 +1960,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..12
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2013,7 +1996,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..14
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2049,7 +2032,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..16
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2088,7 +2071,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_privates_map.
-// CHECK2-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK2-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -2102,7 +2085,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..19
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2139,7 +2122,7 @@
 // CHECK2-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101
 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)*
-// CHECK2-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101
 // CHECK2-NEXT:    store i32 4, i32* [[TMP16]], align 128
 // CHECK2-NEXT:    store i32 4, i32* @a, align 4
@@ -2152,12 +2135,12 @@
 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK2-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]]
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_privates_map..20
-// CHECK2-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S*** noalias [[TMP3:%.*]]) #[[ATTR0]] {
+// CHECK2-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S*** noalias [[TMP3:%.*]]) #[[ATTR7]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.20*, align 8
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -2181,7 +2164,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..21
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2222,7 +2205,7 @@
 // CHECK2-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S***)*
-// CHECK2-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    [[TMP18:%.*]] = load %struct.S**, %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111
@@ -2245,26 +2228,26 @@
 // CHECK2-NEXT:    store i32 1, i32* [[TMP22]], align 4
 // CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK2-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i8* [[TMP24]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i8* [[TMP24]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT:%.*]]
 // CHECK2:       .untied.jmp.2.i:
-// CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK2-NEXT:    [[DOTS2__VOID_ADDR_I:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP26]], i64 4, i8* inttoptr (i64 7 to i8*)) #[[ATTR5]]
+// CHECK2-NEXT:    [[DOTS2__VOID_ADDR_I:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP26]], i64 4, i8* inttoptr (i64 7 to i8*)) #[[ATTR4]]
 // CHECK2-NEXT:    [[DOTS2__ADDR_I:%.*]] = bitcast i8* [[DOTS2__VOID_ADDR_I]] to %struct.S*
 // CHECK2-NEXT:    store %struct.S* [[DOTS2__ADDR_I]], %struct.S** [[TMP18]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    store i32 2, i32* [[TMP27]], align 4
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK2-NEXT:    [[TMP29:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK2-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i8* [[TMP29]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i8* [[TMP29]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK2:       .untied.jmp.3.i:
-// CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 0, i32* [[A_I]], align 4
 // CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK2-NEXT:    [[TMP32:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP32:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP33:%.*]] = bitcast i8* [[TMP32]] to %struct.kmp_task_t_with_privates.18*
 // CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP33]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP33]], i32 0, i32 2
@@ -2272,44 +2255,44 @@
 // CHECK2-NEXT:    [[TMP37:%.*]] = load i32, i32* [[TMP16]], align 128
 // CHECK2-NEXT:    store i32 [[TMP37]], i32* [[TMP36]], align 128
 // CHECK2-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK2-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], i8* [[TMP32]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], i8* [[TMP32]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP40:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    store i32 3, i32* [[TMP40]], align 4
 // CHECK2-NEXT:    [[TMP41:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK2-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]], i8* [[TMP42]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]], i8* [[TMP42]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK2:       .untied.jmp.5.i:
 // CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK2-NEXT:    [[TMP45:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], i32 0) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], i32 0) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP46:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    store i32 4, i32* [[TMP46]], align 4
 // CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK2-NEXT:    [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], i8* [[TMP48]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], i8* [[TMP48]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK2:       .untied.jmp.7.i:
-// CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP50:%.*]] = bitcast %struct.S* [[TMP17]] to i8*
 // CHECK2-NEXT:    [[TMP51:%.*]] = bitcast %struct.S* [[REF_TMP_I]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP50]], i8* align 4 [[TMP51]], i64 4, i1 false) #[[ATTR5]]
-// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP50]], i8* align 4 [[TMP51]], i64 4, i1 false) #[[ATTR4]]
+// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[A9_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 10, i32* [[A9_I]], align 4
 // CHECK2-NEXT:    [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
-// CHECK2-NEXT:    [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK2-NEXT:    store i32 5, i32* [[TMP54]], align 4
 // CHECK2-NEXT:    [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK2-NEXT:    [[TMP56:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK2-NEXT:    [[TMP57:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i8* [[TMP56]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP57:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i8* [[TMP56]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK2:       .untied.jmp.10.i:
-// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP19]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111
 // CHECK2-NEXT:    [[TMP59:%.*]] = bitcast %struct.S* [[TMP19]] to i8*
-// CHECK2-NEXT:    call void @__kmpc_free(i32 [[TMP58]], i8* [[TMP59]], i8* inttoptr (i64 7 to i8*)) #[[ATTR5]]
-// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void @__kmpc_free(i32 [[TMP58]], i8* [[TMP59]], i8* inttoptr (i64 7 to i8*)) #[[ATTR4]]
+// CHECK2-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[TMP17]]) #[[ATTR4]]
 // CHECK2-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111
 // CHECK2-NEXT:    br label [[CLEANUP_I]]
 // CHECK2:       cleanup.i:
@@ -2339,6 +2322,23 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK2-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
+// CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@_ZN2S1C2Ev
 // CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
 // CHECK2-NEXT:  entry:
@@ -2371,7 +2371,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..23
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2464,7 +2464,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_privates_map..26
-// CHECK2-SAME: (%struct..kmp_privates.t.25* noalias [[TMP0:%.*]], double** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK2-SAME: (%struct..kmp_privates.t.25* noalias [[TMP0:%.*]], double** noalias [[TMP1:%.*]]) #[[ATTR7]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.25*, align 8
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca double**, align 8
@@ -2478,7 +2478,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..27
-// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.24* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK2-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.24* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2515,7 +2515,7 @@
 // CHECK2-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !131
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !131
 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)*
-// CHECK2-NEXT:    call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]]
+// CHECK2-NEXT:    call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
 // CHECK2-NEXT:    [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !131
 // CHECK2-NEXT:    [[TMP17:%.*]] = load double, double* [[TMP16]], align 8
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP12]], i32 0, i32 0
@@ -2529,31 +2529,14 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_task_codegen.cpp
-// CHECK2-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK2-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @__cxx_global_var_init()
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK3-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
-// CHECK3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
-// CHECK3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
-// CHECK3-NEXT:    ret void
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@main
-// CHECK3-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[B:%.*]] = alloca i8, align 1
@@ -2863,7 +2846,7 @@
 // CHECK3:       arraydestroy.body:
 // CHECK3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP169]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]]
+// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]]
 // CHECK3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]]
 // CHECK3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK3:       arraydestroy.done52:
@@ -2872,7 +2855,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_ZN1SC1Ev
-// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
@@ -2882,7 +2865,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2928,7 +2911,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..2
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -2969,7 +2952,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3013,15 +2996,15 @@
 // CHECK3:       .untied.jmp..i:
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31
 // CHECK3-NEXT:    store i32 1, i32* [[TMP13]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__3_EXIT:%.*]]
 // CHECK3:       .untied.jmp.1.i:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR5]]
-// CHECK3-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
+// CHECK3-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK3-NEXT:    store i32 1, i32* @a, align 4
-// CHECK3-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK3-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK3-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31
 // CHECK3-NEXT:    br label [[CLEANUP_I]]
 // CHECK3:       cleanup.i:
@@ -3032,7 +3015,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..6
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3076,9 +3059,9 @@
 // CHECK3:       .untied.jmp..i:
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41
 // CHECK3-NEXT:    store i32 1, i32* [[TMP13]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41
-// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK3:       .untied.jmp.1.i:
 // CHECK3-NEXT:    store i32 1, i32* @a, align 4
@@ -3092,7 +3075,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..8
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3136,9 +3119,9 @@
 // CHECK3:       .untied.jmp..i:
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51
 // CHECK3-NEXT:    store i32 1, i32* [[TMP13]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB11]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB11]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51
-// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK3:       .untied.jmp.1.i:
 // CHECK3-NEXT:    store i32 1, i32* @a, align 4
@@ -3152,7 +3135,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3188,7 +3171,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..12
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3224,7 +3207,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..14
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3260,7 +3243,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..16
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3299,7 +3282,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_privates_map.
-// CHECK3-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -3313,7 +3296,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..19
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3350,7 +3333,7 @@
 // CHECK3-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101
 // CHECK3-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)*
-// CHECK3-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101
 // CHECK3-NEXT:    store i32 4, i32* [[TMP16]], align 128
 // CHECK3-NEXT:    store i32 4, i32* @a, align 4
@@ -3363,12 +3346,12 @@
 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK3-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]]
 // CHECK3-NEXT:    ret void
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_privates_map..20
-// CHECK3-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S** noalias [[TMP3:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S** noalias [[TMP3:%.*]]) #[[ATTR7]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.20*, align 8
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -3392,7 +3375,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..21
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3435,7 +3418,7 @@
 // CHECK3-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S**)*
-// CHECK3-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    [[TMP18:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111
@@ -3454,59 +3437,59 @@
 // CHECK3:       .untied.jmp..i:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    store i32 1, i32* [[TMP21]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK3-NEXT:    [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP22]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP22]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT:%.*]]
 // CHECK3:       .untied.jmp.2.i:
-// CHECK3-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR5]]
-// CHECK3-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR4]]
+// CHECK3-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 0, i32* [[A_I]], align 4, !noalias !111
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP24:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3_I]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) #[[ATTR4]]
+// CHECK3-NEXT:    [[TMP24:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3_I]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8* [[TMP24]] to %struct.kmp_task_t_with_privates.18*
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP25]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP25]], i32 0, i32 2
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP27]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP16]], align 128
 // CHECK3-NEXT:    store i32 [[TMP29]], i32* [[TMP28]], align 128
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM4_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4_I]], i8* [[TMP24]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM4_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) #[[ATTR4]]
+// CHECK3-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4_I]], i8* [[TMP24]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP31:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    store i32 2, i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM5_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM5_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP32:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK3-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5_I]], i8* [[TMP32]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5_I]], i8* [[TMP32]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK3:       .untied.jmp.6.i:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM8_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP34:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8_I]], i32 0) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM8_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
+// CHECK3-NEXT:    [[TMP34:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8_I]], i32 0) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    store i32 3, i32* [[TMP35]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM9_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM9_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP36:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK3-NEXT:    [[TMP37:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9_I]], i8* [[TMP36]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP37:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9_I]], i8* [[TMP36]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK3:       .untied.jmp.10.i:
-// CHECK3-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP38:%.*]] = bitcast %struct.S* [[S1_I]] to i8*
 // CHECK3-NEXT:    [[TMP39:%.*]] = bitcast %struct.S* [[REF_TMP_I]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR5]], !noalias !111
-// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR4]], !noalias !111
+// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[A12_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 10, i32* [[A12_I]], align 4, !noalias !111
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK3-NEXT:    store i32 4, i32* [[TMP41]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK3-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14_I]], i8* [[TMP42]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14_I]], i8* [[TMP42]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK3:       .untied.jmp.15.i:
-// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR5]]
-// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR5]]
+// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR4]]
+// CHECK3-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR4]]
 // CHECK3-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111
 // CHECK3-NEXT:    br label [[CLEANUP_I]]
 // CHECK3:       cleanup.i:
@@ -3536,6 +3519,23 @@
 // CHECK3-NEXT:    ret void
 //
 //
+// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK3-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
+// CHECK3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK3-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
+// CHECK3-NEXT:    ret void
+//
+//
 // CHECK3-LABEL: define {{[^@]+}}@_ZN2S1C2Ev
 // CHECK3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
 // CHECK3-NEXT:  entry:
@@ -3569,7 +3569,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..23
-// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK3-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -3608,31 +3608,14 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_task_codegen.cpp
-// CHECK3-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK3-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    call void @__cxx_global_var_init()
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK4-SAME: () #[[ATTR0:[0-9]+]] section "__TEXT,__StaticInit,regular,pure_instructions" {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
-// CHECK4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
-// CHECK4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
-// CHECK4-NEXT:    ret void
-//
-//
 // CHECK4-LABEL: define {{[^@]+}}@main
-// CHECK4-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK4-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[B:%.*]] = alloca i8, align 1
@@ -3942,7 +3925,7 @@
 // CHECK4:       arraydestroy.body:
 // CHECK4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP169]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK4-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
-// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]]
+// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]]
 // CHECK4-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]]
 // CHECK4-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK4:       arraydestroy.done52:
@@ -3951,7 +3934,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_ZN1SC1Ev
-// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
@@ -3961,7 +3944,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry.
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4007,7 +3990,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..2
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4048,7 +4031,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..4
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4092,15 +4075,15 @@
 // CHECK4:       .untied.jmp..i:
 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31
 // CHECK4-NEXT:    store i32 1, i32* [[TMP13]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__3_EXIT:%.*]]
 // CHECK4:       .untied.jmp.1.i:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR5]]
-// CHECK4-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
+// CHECK4-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK4-NEXT:    store i32 1, i32* @a, align 4
-// CHECK4-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR5]]
+// CHECK4-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]]
 // CHECK4-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31
 // CHECK4-NEXT:    br label [[CLEANUP_I]]
 // CHECK4:       cleanup.i:
@@ -4111,7 +4094,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..6
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4155,9 +4138,9 @@
 // CHECK4:       .untied.jmp..i:
 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41
 // CHECK4-NEXT:    store i32 1, i32* [[TMP13]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41
-// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK4:       .untied.jmp.1.i:
 // CHECK4-NEXT:    store i32 1, i32* @a, align 4
@@ -4171,7 +4154,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..8
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4215,9 +4198,9 @@
 // CHECK4:       .untied.jmp..i:
 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51
 // CHECK4-NEXT:    store i32 1, i32* [[TMP13]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB11]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB11]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51
-// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK4:       .untied.jmp.1.i:
 // CHECK4-NEXT:    store i32 1, i32* @a, align 4
@@ -4231,7 +4214,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..10
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4267,7 +4250,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..12
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4303,7 +4286,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..14
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4339,7 +4322,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..16
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4378,7 +4361,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_privates_map.
-// CHECK4-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -4392,7 +4375,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..19
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4429,7 +4412,7 @@
 // CHECK4-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101
 // CHECK4-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101
 // CHECK4-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)*
-// CHECK4-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101
 // CHECK4-NEXT:    store i32 4, i32* [[TMP16]], align 128
 // CHECK4-NEXT:    store i32 4, i32* @a, align 4
@@ -4442,12 +4425,12 @@
 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
 // CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
-// CHECK4-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void @_ZN1SD2Ev(%struct.S* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]]
 // CHECK4-NEXT:    ret void
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_privates_map..20
-// CHECK4-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S** noalias [[TMP3:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (%struct..kmp_privates.t.20* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]], %struct.S** noalias [[TMP2:%.*]], %struct.S** noalias [[TMP3:%.*]]) #[[ATTR7]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.20*, align 8
 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i32**, align 8
@@ -4471,7 +4454,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..21
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4514,7 +4497,7 @@
 // CHECK4-NEXT:    [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S**)*
-// CHECK4-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    [[TMP18:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111
@@ -4533,59 +4516,59 @@
 // CHECK4:       .untied.jmp..i:
 // CHECK4-NEXT:    [[TMP21:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    store i32 1, i32* [[TMP21]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP22:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK4-NEXT:    [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP22]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP22]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT:%.*]]
 // CHECK4:       .untied.jmp.2.i:
-// CHECK4-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR5]]
-// CHECK4-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR4]]
+// CHECK4-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0
 // CHECK4-NEXT:    store i32 0, i32* [[A_I]], align 4, !noalias !111
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM3_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) #[[ATTR5]]
-// CHECK4-NEXT:    [[TMP24:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3_I]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM3_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) #[[ATTR4]]
+// CHECK4-NEXT:    [[TMP24:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3_I]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8* [[TMP24]] to %struct.kmp_task_t_with_privates.18*
 // CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP25]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP25]], i32 0, i32 2
 // CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP27]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP16]], align 128
 // CHECK4-NEXT:    store i32 [[TMP29]], i32* [[TMP28]], align 128
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM4_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) #[[ATTR5]]
-// CHECK4-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4_I]], i8* [[TMP24]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM4_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) #[[ATTR4]]
+// CHECK4-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4_I]], i8* [[TMP24]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP31:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    store i32 2, i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM5_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM5_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP32:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK4-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5_I]], i8* [[TMP32]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5_I]], i8* [[TMP32]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK4:       .untied.jmp.6.i:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM8_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR5]]
-// CHECK4-NEXT:    [[TMP34:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8_I]], i32 0) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM8_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
+// CHECK4-NEXT:    [[TMP34:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8_I]], i32 0) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    store i32 3, i32* [[TMP35]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM9_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM9_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP36:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK4-NEXT:    [[TMP37:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9_I]], i8* [[TMP36]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP37:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9_I]], i8* [[TMP36]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK4:       .untied.jmp.10.i:
-// CHECK4-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP38:%.*]] = bitcast %struct.S* [[S1_I]] to i8*
 // CHECK4-NEXT:    [[TMP39:%.*]] = bitcast %struct.S* [[REF_TMP_I]] to i8*
-// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR5]], !noalias !111
-// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR4]], !noalias !111
+// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[A12_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0
 // CHECK4-NEXT:    store i32 10, i32* [[A12_I]], align 4, !noalias !111
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR5]]
-// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111
 // CHECK4-NEXT:    store i32 4, i32* [[TMP41]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]]
 // CHECK4-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14_I]], i8* [[TMP42]]) #[[ATTR5]]
+// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14_I]], i8* [[TMP42]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[DOTOMP_OUTLINED__17_EXIT]]
 // CHECK4:       .untied.jmp.15.i:
-// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR5]]
-// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR5]]
+// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S2_I]]) #[[ATTR4]]
+// CHECK4-NEXT:    call void @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(4) [[S1_I]]) #[[ATTR4]]
 // CHECK4-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111
 // CHECK4-NEXT:    br label [[CLEANUP_I]]
 // CHECK4:       cleanup.i:
@@ -4615,6 +4598,23 @@
 // CHECK4-NEXT:    ret void
 //
 //
+// CHECK4-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK4-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    call void @_ZN2S1C1Ev(%struct.S1* nonnull dereferenceable(4) @s1)
+// CHECK4-NEXT:    ret void
+//
+//
+// CHECK4-LABEL: define {{[^@]+}}@_ZN2S1C1Ev
+// CHECK4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK4-NEXT:    call void @_ZN2S1C2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]])
+// CHECK4-NEXT:    ret void
+//
+//
 // CHECK4-LABEL: define {{[^@]+}}@_ZN2S1C2Ev
 // CHECK4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
 // CHECK4-NEXT:  entry:
@@ -4648,7 +4648,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry..23
-// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK4-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.22* noalias [[TMP1:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
@@ -4687,7 +4687,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_task_codegen.cpp
-// CHECK4-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" {
+// CHECK4-SAME: () #[[ATTR7]] section "__TEXT,__StaticInit,regular,pure_instructions" {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    call void @__cxx_global_var_init()
 // CHECK4-NEXT:    ret void
diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp
index 31ecb80..27aea98 100644
--- a/clang/test/OpenMP/taskgroup_codegen.cpp
+++ b/clang/test/OpenMP/taskgroup_codegen.cpp
@@ -1,62 +1,294 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=DEBUG1
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
-// CHECK:       [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-
-// CHECK:       define {{.*}}void [[FOO:@.+]]()
-
 void foo() { extern void mayThrow(); mayThrow(); }
 
-// CHECK-LABEL: @main
-// TERM_DEBUG-LABEL: @main
 int main() {
-// CHECK:       [[A_ADDR:%.+]] = alloca i8
   char a;
-
-// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// CHECK:       call {{.*}}void @__kmpc_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  store i8 2, i8* [[A_ADDR]]
-// CHECK-NEXT:  call {{.*}}void @__kmpc_end_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 #pragma omp taskgroup
   a = 2;
-// CHECK:       call {{.*}}void @__kmpc_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  invoke {{.*}}void [[FOO]]()
-// CHECK:       call {{.*}}void @__kmpc_end_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 #pragma omp taskgroup
   foo();
-// CHECK-NOT:   call {{.*}}void @__kmpc_taskgroup
-// CHECK-NOT:   call {{.*}}void @__kmpc_end_taskgroup
-// CHECK:       ret
   return a;
 }
 
-// CHECK-LABEL:      parallel_taskgroup
-// TERM_DEBUG-LABEL: parallel_taskgroup
 void parallel_taskgroup() {
 #pragma omp parallel
 #pragma omp taskgroup
-  // TERM_DEBUG-NOT: __kmpc_global_thread_num
-  // TERM_DEBUG:     call void @__kmpc_taskgroup({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
-  // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
-  // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
-  // TERM_DEBUG-NOT: __kmpc_global_thread_num
-  // TERM_DEBUG:     call void @__kmpc_end_taskgroup({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
-  // TERM_DEBUG:     [[TERM_LPAD]]
-  // TERM_DEBUG:     call void @__clang_call_terminate
-  // TERM_DEBUG:     unreachable
   foo();
 }
-// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-12]],
-// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-3]],
 #endif
+// CHECK1-LABEL: define {{[^@]+}}@_Z3foov
+// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_Z8mayThrowv()
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@main
+// CHECK1-SAME: () #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK1-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-NEXT:    store i8 2, i8* [[A]], align 1
+// CHECK1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-NEXT:    invoke void @_Z3foov()
+// CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK1:       invoke.cont:
+// CHECK1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A]], align 1
+// CHECK1-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// CHECK1-NEXT:    ret i32 [[CONV]]
+// CHECK1:       lpad:
+// CHECK1-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    catch i8* null
+// CHECK1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
+// CHECK1-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
+// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-NEXT:    br label [[TERMINATE_HANDLER:%.*]]
+// CHECK1:       terminate.handler:
+// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR8:[0-9]+]]
+// CHECK1-NEXT:    unreachable
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__clang_call_terminate
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK1-NEXT:    [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT:    call void @_ZSt9terminatev() #[[ATTR8]]
+// CHECK1-NEXT:    unreachable
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv
+// CHECK1-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
+// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+// CHECK1-NEXT:    invoke void @_Z3foov()
+// CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK1:       invoke.cont:
+// CHECK1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+// CHECK1-NEXT:    ret void
+// CHECK1:       lpad:
+// CHECK1-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    catch i8* null
+// CHECK1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
+// CHECK1-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
+// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+// CHECK1-NEXT:    br label [[TERMINATE_HANDLER:%.*]]
+// CHECK1:       terminate.handler:
+// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR8]]
+// CHECK1-NEXT:    unreachable
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_Z3foov
+// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @_Z8mayThrowv()
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@main
+// CHECK2-SAME: () #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK2-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-NEXT:    store i8 2, i8* [[A]], align 1
+// CHECK2-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-NEXT:    invoke void @_Z3foov()
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK2:       invoke.cont:
+// CHECK2-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A]], align 1
+// CHECK2-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// CHECK2-NEXT:    ret i32 [[CONV]]
+// CHECK2:       lpad:
+// CHECK2-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    catch i8* null
+// CHECK2-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
+// CHECK2-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
+// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-NEXT:    br label [[TERMINATE_HANDLER:%.*]]
+// CHECK2:       terminate.handler:
+// CHECK2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR8:[0-9]+]]
+// CHECK2-NEXT:    unreachable
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK2-NEXT:    [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT:    call void @_ZSt9terminatev() #[[ATTR8]]
+// CHECK2-NEXT:    unreachable
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv
+// CHECK2-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
+// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK2-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+// CHECK2-NEXT:    invoke void @_Z3foov()
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK2:       invoke.cont:
+// CHECK2-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+// CHECK2-NEXT:    ret void
+// CHECK2:       lpad:
+// CHECK2-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    catch i8* null
+// CHECK2-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
+// CHECK2-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
+// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+// CHECK2-NEXT:    br label [[TERMINATE_HANDLER:%.*]]
+// CHECK2:       terminate.handler:
+// CHECK2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR8]]
+// CHECK2-NEXT:    unreachable
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_Z3foov
+// DEBUG1-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    call void @_Z8mayThrowv(), !dbg [[DBG9:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG10:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@main
+// DEBUG1-SAME: () #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG11:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[A:%.*]] = alloca i8, align 1
+// DEBUG1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// DEBUG1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// DEBUG1-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !dbg [[DBG12:![0-9]+]]
+// DEBUG1-NEXT:    store i8 2, i8* [[A]], align 1, !dbg [[DBG13:![0-9]+]]
+// DEBUG1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !dbg [[DBG14:![0-9]+]]
+// DEBUG1-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]), !dbg [[DBG15:![0-9]+]]
+// DEBUG1-NEXT:    invoke void @_Z3foov()
+// DEBUG1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG16:![0-9]+]]
+// DEBUG1:       invoke.cont:
+// DEBUG1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]), !dbg [[DBG16]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A]], align 1, !dbg [[DBG17:![0-9]+]]
+// DEBUG1-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32, !dbg [[DBG17]]
+// DEBUG1-NEXT:    ret i32 [[CONV]], !dbg [[DBG18:![0-9]+]]
+// DEBUG1:       lpad:
+// DEBUG1-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    catch i8* null, !dbg [[DBG19:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0, !dbg [[DBG19]]
+// DEBUG1-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG19]]
+// DEBUG1-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1, !dbg [[DBG19]]
+// DEBUG1-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG19]]
+// DEBUG1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]), !dbg [[DBG16]]
+// DEBUG1-NEXT:    br label [[TERMINATE_HANDLER:%.*]], !dbg [[DBG16]]
+// DEBUG1:       terminate.handler:
+// DEBUG1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG16]]
+// DEBUG1-NEXT:    call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR8:[0-9]+]], !dbg [[DBG16]]
+// DEBUG1-NEXT:    unreachable, !dbg [[DBG16]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@__clang_call_terminate
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] {
+// DEBUG1-NEXT:    [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3:[0-9]+]]
+// DEBUG1-NEXT:    call void @_ZSt9terminatev() #[[ATTR8]]
+// DEBUG1-NEXT:    unreachable
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv
+// DEBUG1-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG20:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg [[DBG21:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG22:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.omp_outlined.
+// DEBUG1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG23:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// DEBUG1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// DEBUG1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// DEBUG1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG24:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG24]]
+// DEBUG1-NEXT:    call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG24]]
+// DEBUG1-NEXT:    invoke void @_Z3foov()
+// DEBUG1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG25:![0-9]+]]
+// DEBUG1:       invoke.cont:
+// DEBUG1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB5]], i32 [[TMP1]]), !dbg [[DBG25]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG26:![0-9]+]]
+// DEBUG1:       lpad:
+// DEBUG1-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    catch i8* null, !dbg [[DBG27:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0, !dbg [[DBG27]]
+// DEBUG1-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG27]]
+// DEBUG1-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1, !dbg [[DBG27]]
+// DEBUG1-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG27]]
+// DEBUG1-NEXT:    call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB5]], i32 [[TMP1]]), !dbg [[DBG25]]
+// DEBUG1-NEXT:    br label [[TERMINATE_HANDLER:%.*]], !dbg [[DBG25]]
+// DEBUG1:       terminate.handler:
+// DEBUG1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]]
+// DEBUG1-NEXT:    call void @__clang_call_terminate(i8* [[EXN]]) #[[ATTR8]], !dbg [[DBG25]]
+// DEBUG1-NEXT:    unreachable, !dbg [[DBG25]]
+//
diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp
index a7da34e..42c5c24 100644
--- a/clang/test/OpenMP/threadprivate_codegen.cpp
+++ b/clang/test/OpenMP/threadprivate_codegen.cpp
@@ -1,54 +1,29 @@
-// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,OMP50
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,OMP45
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK2
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD1 %s
 // RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
-
-// RUN: %clang_cc1 -verify -fopenmp -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK-TLS,OMP50-TLS
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK-TLS,OMP45-TLS
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD2 %s
+// RUN: %clang_cc1 -verify -fopenmp -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK-TLS1
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK-TLS2
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-TLS,OMP50-TLS %s
+// RUN: %clang_cc1 -fopenmp -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-TLS3 %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-TLS,OMP45-TLS %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-TLS4 %s
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD3 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD4 %s
 
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG,OMP50-DEBUG %s
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=DEBUG1 %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG,OMP45-DEBUG %s
-
-// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=DEBUG2 %s
 
 // expected-no-diagnostics
-//
 #ifndef HEADER
 #define HEADER
-// CHECK-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] }
-// CHECK-DAG: [[S2:%.+]] = type { [[INT]], double }
-// CHECK-DAG: [[S3:%.+]] = type { [[INT]], float }
-// CHECK-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] }
-// CHECK-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] }
-// CHECK-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double }
-// CHECK-DEBUG-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DEBUG-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] }
-// CHECK-DEBUG-DAG: [[S2:%.+]] = type { [[INT]], double }
-// CHECK-DEBUG-DAG: [[S3:%.+]] = type { [[INT]], float }
-// CHECK-DEBUG-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] }
-// CHECK-DEBUG-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] }
-// CHECK-DEBUG-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double }
-// CHECK-TLS-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] }
-// CHECK-TLS-DAG: [[S2:%.+]] = type { [[INT]], double }
-// CHECK-TLS-DAG: [[S3:%.+]] = type { [[INT]], float }
-// CHECK-TLS-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] }
-// CHECK-TLS-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] }
-// CHECK-TLS-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double }
-
 struct S1 {
   int a;
   S1()
@@ -598,7 +573,6 @@
   // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
   // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  //
   // CHECK-TLS:       [[ST_INT_ST_VAL:%.*]] = load i32, i32* [[ST_INT_ST_ADDR:[^,]+]]
   // CHECK-TLS-NEXT:  [[RES:%.*]] = load i32, i32* [[RES_ADDR]]
   // CHECK-TLS-NEXT:  [[ADD:%.*]] = add {{.*}} i32 [[RES]], [[ST_INT_ST_VAL]]
@@ -620,7 +594,6 @@
   // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
   // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  //
   // CHECK-TLS:      [[ST_FLOAT_ST_VAL:%.*]]  = load float, float* [[ST_FLOAT_ST_ADDR:[^,]+]]
   // CHECK-TLS-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]]  to i32
   // CHECK-TLS-NEXT: [[RES:%.*]] = load i32, i32* [[RES_ADDR]]
@@ -727,14 +700,6 @@
 // CHECK-TLS:   call void [[ARR_X_TLS_INIT]]
 // CHECK-TLS:   ret [2 x [3 x [[S1]]]]* [[ARR_X]]
 // CHECK-TLS: }
-//
-//
-//
-//
-//
-//
-//
-//
 // CHECK-TLS: define {{.*}} [[S4]]* [[ST_S4_ST_TLS_INITD]] {{#[0-9]+}} comdat {
 // CHECK-TLS:   call void [[ST_S4_ST_TLS_INIT]]
 // CHECK-TLS:   ret [[S4]]* [[ST_S4_ST]]
@@ -874,7 +839,6 @@
   // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
   // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  //
   // OMP45-TLS:      [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST_ADDR:[^,]+]]
   // OMP45-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // OMP45-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]]
@@ -896,7 +860,6 @@
   // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
   // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]]
   // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]]
-  //
   // OMP45-TLS:      [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST_ADDR:[^,]+]]
   // OMP45-TLS-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]]
   // OMP45-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]]
@@ -1055,3 +1018,6990 @@
 
 // CHECK-TLS-DAG:      declare {{.*}} void [[GS3_TLS_INIT]]
 // CHECK-TLS-DAG:      declare {{.*}} void [[STATIC_S_TLS_INIT]]
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*
+// CHECK1-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[TMP2]], i32 5)
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    ret i8* [[TMP3]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK1-NEXT:    [[ARRAYINIT_ENDOFINIT2:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[ARRAYINIT_ENDOFINIT9:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x [3 x %struct.S1]]*
+// CHECK1-NEXT:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP2]], i64 0, i64 0
+// CHECK1-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK1-NEXT:    [[ARRAYINIT_BEGIN1:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0
+// CHECK1-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN1]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN1]], i32 1)
+// CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK1:       invoke.cont:
+// CHECK1-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYINIT_BEGIN1]], i64 1
+// CHECK1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2)
+// CHECK1-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// CHECK1:       invoke.cont3:
+// CHECK1-NEXT:    [[ARRAYINIT_ELEMENT4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT]], i64 1
+// CHECK1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT4]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT4]], i32 3)
+// CHECK1-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]]
+// CHECK1:       invoke.cont5:
+// CHECK1-NEXT:    [[ARRAYINIT_ELEMENT7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 1
+// CHECK1-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK1-NEXT:    [[ARRAYINIT_BEGIN8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], i64 0, i64 0
+// CHECK1-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN8]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN8]], i32 4)
+// CHECK1-NEXT:    to label [[INVOKE_CONT11:%.*]] unwind label [[LPAD10:%.*]]
+// CHECK1:       invoke.cont11:
+// CHECK1-NEXT:    [[ARRAYINIT_ELEMENT12:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_BEGIN8]], i64 1
+// CHECK1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT12]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT12]], i32 5)
+// CHECK1-NEXT:    to label [[INVOKE_CONT13:%.*]] unwind label [[LPAD10]]
+// CHECK1:       invoke.cont13:
+// CHECK1-NEXT:    [[ARRAYINIT_ELEMENT14:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT12]], i64 1
+// CHECK1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT14]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT14]], i32 6)
+// CHECK1-NEXT:    to label [[INVOKE_CONT15:%.*]] unwind label [[LPAD10]]
+// CHECK1:       invoke.cont15:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    ret i8* [[TMP3]]
+// CHECK1:       lpad:
+// CHECK1-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    cleanup
+// CHECK1-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
+// CHECK1-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
+// CHECK1-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN1]], [[TMP7]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK1:       arraydestroy.body:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP7]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAYINIT_BEGIN1]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6]], label [[ARRAYDESTROY_BODY]]
+// CHECK1:       arraydestroy.done6:
+// CHECK1-NEXT:    br label [[EHCLEANUP:%.*]]
+// CHECK1:       lpad10:
+// CHECK1-NEXT:    [[TMP8:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    cleanup
+// CHECK1-NEXT:    [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0
+// CHECK1-NEXT:    store i8* [[TMP9]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 1
+// CHECK1-NEXT:    store i32 [[TMP10]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN8]], [[TMP11]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// CHECK1:       arraydestroy.body17:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[TMP11]], [[LPAD10]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], [[ARRAYINIT_BEGIN8]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// CHECK1:       arraydestroy.done21:
+// CHECK1-NEXT:    br label [[EHCLEANUP]]
+// CHECK1:       ehcleanup:
+// CHECK1-NEXT:    [[TMP12:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK1-NEXT:    [[PAD_ARRAYBEGIN:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0
+// CHECK1-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP12]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY22:%.*]] = icmp eq %struct.S1* [[PAD_ARRAYBEGIN]], [[PAD_ARRAYEND]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY22]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY23:%.*]]
+// CHECK1:       arraydestroy.body23:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST24:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT25:%.*]], [[ARRAYDESTROY_BODY23]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT25]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST24]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT25]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE26:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT25]], [[PAD_ARRAYBEGIN]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE26]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY23]]
+// CHECK1:       arraydestroy.done27:
+// CHECK1-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK1:       eh.resume:
+// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK1-NEXT:    [[LPAD_VAL28:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL28]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAY_BEGIN]], i64 6
+// CHECK1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK1:       arraydestroy.body:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1:       arraydestroy.done1:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5)
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// CHECK1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27)
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// CHECK1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// CHECK1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// CHECK1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// CHECK1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.5
+// CHECK1-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK1-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// CHECK1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK1:       invoke.cont:
+// CHECK1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// CHECK1-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+// CHECK1:       invoke.cont2:
+// CHECK1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// CHECK1-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// CHECK1:       invoke.cont3:
+// CHECK1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// CHECK1-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]]
+// CHECK1:       invoke.cont7:
+// CHECK1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// CHECK1-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]]
+// CHECK1:       invoke.cont8:
+// CHECK1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// CHECK1-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]]
+// CHECK1:       invoke.cont9:
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+// CHECK1:       lpad:
+// CHECK1-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    cleanup
+// CHECK1-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// CHECK1-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// CHECK1-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK1:       arraydestroy.body:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]]
+// CHECK1:       arraydestroy.done4:
+// CHECK1-NEXT:    br label [[EHCLEANUP:%.*]]
+// CHECK1:       lpad6:
+// CHECK1-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    cleanup
+// CHECK1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0
+// CHECK1-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1
+// CHECK1-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]]
+// CHECK1:       arraydestroy.body11:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0)
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]]
+// CHECK1:       arraydestroy.done15:
+// CHECK1-NEXT:    br label [[EHCLEANUP]]
+// CHECK1:       ehcleanup:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK1-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]]
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// CHECK1:       arraydestroy.body17:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// CHECK1:       arraydestroy.done21:
+// CHECK1-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK1:       eh.resume:
+// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK1-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK1:       arraydestroy.body:
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0)
+// CHECK1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK1:       arraydestroy.done1:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@main
+// CHECK1-SAME: () #[[ATTR4:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8
+// CHECK1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0
+// CHECK1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]]
+// CHECK1:       init.check:
+// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]]
+// CHECK1:       init:
+// CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* (i8*)* @.__kmpc_global_ctor_..6, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..7)
+// CHECK1-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP6]])
+// CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK1:       invoke.cont:
+// CHECK1-NEXT:    [[TMP7:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK1-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// CHECK1-NEXT:    br label [[INIT_END]]
+// CHECK1:       init.end:
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.)
+// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.S3*
+// CHECK1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP9]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK1-NEXT:    store i32 [[TMP10]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i64 24, i8*** @_ZZ4mainE2sm.cache.)
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.Smain*
+// CHECK1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP12]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[A2]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.S1*
+// CHECK1-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP16]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A3]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
+// CHECK1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// CHECK1-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.)
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.S5*
+// CHECK1-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP22]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, i32* [[A6]], align 4
+// CHECK1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP23]]
+// CHECK1-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP25:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.)
+// CHECK1-NEXT:    [[TMP26:%.*]] = bitcast i8* [[TMP25]] to [2 x [3 x %struct.S1]]*
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP26]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK1-NEXT:    [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[A9]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP27]]
+// CHECK1-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP29:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.)
+// CHECK1-NEXT:    [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i32*
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+// CHECK1-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.)
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to float*
+// CHECK1-NEXT:    [[TMP35:%.*]] = load float, float* [[TMP34]], align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP35]] to i32
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[CONV]]
+// CHECK1-NEXT:    store i32 [[ADD12]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP37:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.)
+// CHECK1-NEXT:    [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.S4*
+// CHECK1-NEXT:    [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP38]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, i32* [[A13]], align 4
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP40]], [[TMP39]]
+// CHECK1-NEXT:    store i32 [[ADD14]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    ret i32 [[TMP41]]
+// CHECK1:       lpad:
+// CHECK1-NEXT:    [[TMP42:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    cleanup
+// CHECK1-NEXT:    [[TMP43:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 0
+// CHECK1-NEXT:    store i8* [[TMP43]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP44:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 1
+// CHECK1-NEXT:    store i32 [[TMP44]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// CHECK1-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK1:       eh.resume:
+// CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK1-NEXT:    [[LPAD_VAL15:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL15]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..6
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.Smain*
+// CHECK1-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) [[TMP3]], i32 [[TMP6]])
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    ret i8* [[TMP7]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// CHECK1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..7
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.Smain*
+// CHECK1-NEXT:    call void @_ZZ4mainEN5SmainD1Ev(%struct.Smain* nonnull dereferenceable(24) [[TMP2]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// CHECK1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// CHECK1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// CHECK1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_Z6foobarv
+// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    [[TMP1:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.)
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S3*
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP2]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    store i32 [[TMP3]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*
+// CHECK1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP8]]
+// CHECK1-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.)
+// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.S5*
+// CHECK1-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP11]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A3]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.)
+// CHECK1-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to [2 x [3 x %struct.S1]]*
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP15]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK1-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP16:%.*]] = load i32, i32* [[A6]], align 4
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+// CHECK1-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.)
+// CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8* [[TMP18]] to i32*
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
+// CHECK1-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.)
+// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8* [[TMP22]] to float*
+// CHECK1-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP23]], align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP24]] to i32
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[CONV]]
+// CHECK1-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP26:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.)
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8* [[TMP26]] to %struct.S4*
+// CHECK1-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP27]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[A10]], align 4
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+// CHECK1-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK1-NEXT:    ret i32 [[TMP30]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.8
+// CHECK1-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]]
+// CHECK1:       init.check:
+// CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* (i8*)* @.__kmpc_global_ctor_..9, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..10)
+// CHECK1-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23)
+// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK1-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK1-NEXT:    br label [[INIT_END]]
+// CHECK1:       init.end:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..9
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*
+// CHECK1-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) [[TMP2]], i32 23)
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    ret i8* [[TMP3]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// CHECK1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..10
+// CHECK1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*
+// CHECK1-NEXT:    call void @_ZN2S4D1Ev(%struct.S4* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// CHECK1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// CHECK1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// CHECK1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// CHECK1-SAME: () #[[ATTR0]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @__cxx_global_var_init()
+// CHECK1-NEXT:    call void @.__omp_threadprivate_init_.()
+// CHECK1-NEXT:    call void @__cxx_global_var_init.4()
+// CHECK1-NEXT:    call void @__cxx_global_var_init.5()
+// CHECK1-NEXT:    call void @.__omp_threadprivate_init_..3()
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5)
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*
+// CHECK2-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[TMP2]], i32 5)
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    ret i8* [[TMP3]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
+// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27)
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// CHECK2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// CHECK2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// CHECK2-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK2-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK2:       invoke.cont:
+// CHECK2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// CHECK2-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+// CHECK2:       invoke.cont2:
+// CHECK2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// CHECK2-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// CHECK2:       invoke.cont3:
+// CHECK2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// CHECK2-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]]
+// CHECK2:       invoke.cont7:
+// CHECK2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// CHECK2-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]]
+// CHECK2:       invoke.cont8:
+// CHECK2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// CHECK2-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]]
+// CHECK2:       invoke.cont9:
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+// CHECK2:       lpad:
+// CHECK2-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    cleanup
+// CHECK2-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// CHECK2-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK2-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK2:       arraydestroy.body:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]]
+// CHECK2:       arraydestroy.done4:
+// CHECK2-NEXT:    br label [[EHCLEANUP:%.*]]
+// CHECK2:       lpad6:
+// CHECK2-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    cleanup
+// CHECK2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0
+// CHECK2-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1
+// CHECK2-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK2-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]]
+// CHECK2:       arraydestroy.body11:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0)
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]]
+// CHECK2:       arraydestroy.done15:
+// CHECK2-NEXT:    br label [[EHCLEANUP]]
+// CHECK2:       ehcleanup:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK2-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0
+// CHECK2-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// CHECK2:       arraydestroy.body17:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// CHECK2:       arraydestroy.done21:
+// CHECK2-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK2:       eh.resume:
+// CHECK2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK2-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK2-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK2:       arraydestroy.body:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0)
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK2:       arraydestroy.done1:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..3
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK2-NEXT:    [[ARRAYINIT_ENDOFINIT2:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[ARRAYINIT_ENDOFINIT9:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x [3 x %struct.S1]]*
+// CHECK2-NEXT:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP2]], i64 0, i64 0
+// CHECK2-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK2-NEXT:    [[ARRAYINIT_BEGIN1:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0
+// CHECK2-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN1]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN1]], i32 1)
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK2:       invoke.cont:
+// CHECK2-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYINIT_BEGIN1]], i64 1
+// CHECK2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2)
+// CHECK2-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// CHECK2:       invoke.cont3:
+// CHECK2-NEXT:    [[ARRAYINIT_ELEMENT4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT]], i64 1
+// CHECK2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT4]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT4]], i32 3)
+// CHECK2-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]]
+// CHECK2:       invoke.cont5:
+// CHECK2-NEXT:    [[ARRAYINIT_ELEMENT7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 1
+// CHECK2-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK2-NEXT:    [[ARRAYINIT_BEGIN8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], i64 0, i64 0
+// CHECK2-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN8]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN8]], i32 4)
+// CHECK2-NEXT:    to label [[INVOKE_CONT11:%.*]] unwind label [[LPAD10:%.*]]
+// CHECK2:       invoke.cont11:
+// CHECK2-NEXT:    [[ARRAYINIT_ELEMENT12:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_BEGIN8]], i64 1
+// CHECK2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT12]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT12]], i32 5)
+// CHECK2-NEXT:    to label [[INVOKE_CONT13:%.*]] unwind label [[LPAD10]]
+// CHECK2:       invoke.cont13:
+// CHECK2-NEXT:    [[ARRAYINIT_ELEMENT14:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT12]], i64 1
+// CHECK2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT14]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT14]], i32 6)
+// CHECK2-NEXT:    to label [[INVOKE_CONT15:%.*]] unwind label [[LPAD10]]
+// CHECK2:       invoke.cont15:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    ret i8* [[TMP3]]
+// CHECK2:       lpad:
+// CHECK2-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    cleanup
+// CHECK2-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0
+// CHECK2-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1
+// CHECK2-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8
+// CHECK2-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN1]], [[TMP7]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK2:       arraydestroy.body:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP7]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAYINIT_BEGIN1]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6]], label [[ARRAYDESTROY_BODY]]
+// CHECK2:       arraydestroy.done6:
+// CHECK2-NEXT:    br label [[EHCLEANUP:%.*]]
+// CHECK2:       lpad10:
+// CHECK2-NEXT:    [[TMP8:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    cleanup
+// CHECK2-NEXT:    [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0
+// CHECK2-NEXT:    store i8* [[TMP9]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 1
+// CHECK2-NEXT:    store i32 [[TMP10]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8
+// CHECK2-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN8]], [[TMP11]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// CHECK2:       arraydestroy.body17:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[TMP11]], [[LPAD10]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], [[ARRAYINIT_BEGIN8]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// CHECK2:       arraydestroy.done21:
+// CHECK2-NEXT:    br label [[EHCLEANUP]]
+// CHECK2:       ehcleanup:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK2-NEXT:    [[PAD_ARRAYBEGIN:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0
+// CHECK2-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP12]], i64 0, i64 0
+// CHECK2-NEXT:    [[ARRAYDESTROY_ISEMPTY22:%.*]] = icmp eq %struct.S1* [[PAD_ARRAYBEGIN]], [[PAD_ARRAYEND]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY22]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY23:%.*]]
+// CHECK2:       arraydestroy.body23:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST24:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT25:%.*]], [[ARRAYDESTROY_BODY23]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT25]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST24]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT25]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE26:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT25]], [[PAD_ARRAYBEGIN]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE26]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY23]]
+// CHECK2:       arraydestroy.done27:
+// CHECK2-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK2:       eh.resume:
+// CHECK2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK2-NEXT:    [[LPAD_VAL28:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK2-NEXT:    resume { i8*, i32 } [[LPAD_VAL28]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..4
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*
+// CHECK2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAY_BEGIN]], i64 6
+// CHECK2-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK2:       arraydestroy.body:
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
+// CHECK2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK2:       arraydestroy.done1:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..5
+// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i8* (i8*)* @.__kmpc_global_ctor_..3, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..4)
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@main
+// CHECK2-SAME: () #[[ATTR4:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8
+// CHECK2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0
+// CHECK2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]]
+// CHECK2:       init.check:
+// CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK2-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]]
+// CHECK2:       init:
+// CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* (i8*)* @.__kmpc_global_ctor_..6, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..7)
+// CHECK2-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP6]])
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK2:       invoke.cont:
+// CHECK2-NEXT:    [[TMP7:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK2-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// CHECK2-NEXT:    br label [[INIT_END]]
+// CHECK2:       init.end:
+// CHECK2-NEXT:    [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.)
+// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.S3*
+// CHECK2-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP9]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i64 24, i8*** @_ZZ4mainE2sm.cache.)
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.Smain*
+// CHECK2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP12]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[A2]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK2-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.S1*
+// CHECK2-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP16]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A3]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
+// CHECK2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// CHECK2-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.)
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.S5*
+// CHECK2-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP22]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[A6]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP23]]
+// CHECK2-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP25:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.)
+// CHECK2-NEXT:    [[TMP26:%.*]] = bitcast i8* [[TMP25]] to [2 x [3 x %struct.S1]]*
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP26]], i64 0, i64 1
+// CHECK2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK2-NEXT:    [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[A9]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP27]]
+// CHECK2-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.)
+// CHECK2-NEXT:    [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i32*
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+// CHECK2-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.)
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to float*
+// CHECK2-NEXT:    [[TMP35:%.*]] = load float, float* [[TMP34]], align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP35]] to i32
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[CONV]]
+// CHECK2-NEXT:    store i32 [[ADD12]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.)
+// CHECK2-NEXT:    [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.S4*
+// CHECK2-NEXT:    [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP38]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, i32* [[A13]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP40]], [[TMP39]]
+// CHECK2-NEXT:    store i32 [[ADD14]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    ret i32 [[TMP41]]
+// CHECK2:       lpad:
+// CHECK2-NEXT:    [[TMP42:%.*]] = landingpad { i8*, i32 }
+// CHECK2-NEXT:    cleanup
+// CHECK2-NEXT:    [[TMP43:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 0
+// CHECK2-NEXT:    store i8* [[TMP43]], i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[TMP44:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 1
+// CHECK2-NEXT:    store i32 [[TMP44]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// CHECK2-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK2:       eh.resume:
+// CHECK2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK2-NEXT:    [[LPAD_VAL15:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK2-NEXT:    resume { i8*, i32 } [[LPAD_VAL15]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..6
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.Smain*
+// CHECK2-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) [[TMP3]], i32 [[TMP6]])
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    ret i8* [[TMP7]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// CHECK2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..7
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.Smain*
+// CHECK2-NEXT:    call void @_ZZ4mainEN5SmainD1Ev(%struct.Smain* nonnull dereferenceable(24) [[TMP2]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// CHECK2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_Z6foobarv
+// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    [[TMP1:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.)
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S3*
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP2]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.)
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*
+// CHECK2-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP8]]
+// CHECK2-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.)
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.S5*
+// CHECK2-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP11]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A3]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.)
+// CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to [2 x [3 x %struct.S1]]*
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP15]], i64 0, i64 1
+// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK2-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[A6]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+// CHECK2-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.)
+// CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8* [[TMP18]] to i32*
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
+// CHECK2-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.)
+// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8* [[TMP22]] to float*
+// CHECK2-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP23]], align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP24]] to i32
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[CONV]]
+// CHECK2-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.)
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8* [[TMP26]] to %struct.S4*
+// CHECK2-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP27]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[A10]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+// CHECK2-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK2-NEXT:    ret i32 [[TMP30]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init.8
+// CHECK2-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]]
+// CHECK2:       init.check:
+// CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* (i8*)* @.__kmpc_global_ctor_..9, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..10)
+// CHECK2-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23)
+// CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK2-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK2-NEXT:    br label [[INIT_END]]
+// CHECK2:       init.end:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..9
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*
+// CHECK2-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) [[TMP2]], i32 23)
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    ret i8* [[TMP3]]
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// CHECK2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..10
+// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*
+// CHECK2-NEXT:    call void @_ZN2S4D1Ev(%struct.S4* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// CHECK2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// CHECK2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// CHECK2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// CHECK2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// CHECK2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// CHECK2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// CHECK2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// CHECK2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @__cxx_global_var_init()
+// CHECK2-NEXT:    call void @.__omp_threadprivate_init_.()
+// CHECK2-NEXT:    call void @__cxx_global_var_init.1()
+// CHECK2-NEXT:    call void @__cxx_global_var_init.2()
+// CHECK2-NEXT:    call void @.__omp_threadprivate_init_..5()
+// CHECK2-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// SIMD1-SAME: () #[[ATTR0:[0-9]+]] {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5)
+// SIMD1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// SIMD1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// SIMD1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// SIMD1-SAME: () #[[ATTR0]] {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27)
+// SIMD1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]]
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// SIMD1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]])
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// SIMD1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// SIMD1-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// SIMD1-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// SIMD1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// SIMD1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// SIMD1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// SIMD1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// SIMD1:       invoke.cont:
+// SIMD1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// SIMD1-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+// SIMD1:       invoke.cont2:
+// SIMD1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// SIMD1-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// SIMD1:       invoke.cont3:
+// SIMD1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// SIMD1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// SIMD1-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]]
+// SIMD1:       invoke.cont7:
+// SIMD1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// SIMD1-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]]
+// SIMD1:       invoke.cont8:
+// SIMD1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// SIMD1-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]]
+// SIMD1:       invoke.cont9:
+// SIMD1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// SIMD1-NEXT:    ret void
+// SIMD1:       lpad:
+// SIMD1-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// SIMD1-NEXT:    cleanup
+// SIMD1-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// SIMD1-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// SIMD1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// SIMD1-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD1-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]]
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// SIMD1:       arraydestroy.body:
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// SIMD1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// SIMD1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]]
+// SIMD1:       arraydestroy.done4:
+// SIMD1-NEXT:    br label [[EHCLEANUP:%.*]]
+// SIMD1:       lpad6:
+// SIMD1-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// SIMD1-NEXT:    cleanup
+// SIMD1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0
+// SIMD1-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8
+// SIMD1-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1
+// SIMD1-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD1-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD1-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]]
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]]
+// SIMD1:       arraydestroy.body11:
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ]
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1
+// SIMD1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]]
+// SIMD1-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0)
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]]
+// SIMD1:       arraydestroy.done15:
+// SIMD1-NEXT:    br label [[EHCLEANUP]]
+// SIMD1:       ehcleanup:
+// SIMD1-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// SIMD1-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0
+// SIMD1-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]]
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// SIMD1:       arraydestroy.body17:
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// SIMD1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// SIMD1-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// SIMD1:       arraydestroy.done21:
+// SIMD1-NEXT:    br label [[EH_RESUME:%.*]]
+// SIMD1:       eh.resume:
+// SIMD1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// SIMD1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// SIMD1-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// SIMD1-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]]
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// SIMD1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// SIMD1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// SIMD1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// SIMD1:       arraydestroy.body:
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// SIMD1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// SIMD1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// SIMD1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0)
+// SIMD1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// SIMD1:       arraydestroy.done1:
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@main
+// SIMD1-SAME: () #[[ATTR4:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// SIMD1-NEXT:    [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8
+// SIMD1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// SIMD1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]]
+// SIMD1:       init.check:
+// SIMD1-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// SIMD1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// SIMD1-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]]
+// SIMD1:       init:
+// SIMD1-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4
+// SIMD1-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]])
+// SIMD1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// SIMD1:       invoke.cont:
+// SIMD1-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]]
+// SIMD1-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// SIMD1-NEXT:    br label [[INIT_END]]
+// SIMD1:       init.end:
+// SIMD1-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4
+// SIMD1-NEXT:    store i32 [[TMP4]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8
+// SIMD1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// SIMD1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4
+// SIMD1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+// SIMD1-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// SIMD1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// SIMD1-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4
+// SIMD1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// SIMD1-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4
+// SIMD1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// SIMD1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4
+// SIMD1-NEXT:    [[TMP16:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+// SIMD1-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4
+// SIMD1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP17]] to i32
+// SIMD1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]]
+// SIMD1-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4
+// SIMD1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// SIMD1-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    ret i32 [[TMP21]]
+// SIMD1:       lpad:
+// SIMD1-NEXT:    [[TMP22:%.*]] = landingpad { i8*, i32 }
+// SIMD1-NEXT:    cleanup
+// SIMD1-NEXT:    [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0
+// SIMD1-NEXT:    store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8
+// SIMD1-NEXT:    [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1
+// SIMD1-NEXT:    store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD1-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// SIMD1-NEXT:    br label [[EH_RESUME:%.*]]
+// SIMD1:       eh.resume:
+// SIMD1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// SIMD1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// SIMD1-NEXT:    [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// SIMD1-NEXT:    resume { i8*, i32 } [[LPAD_VAL8]]
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// SIMD1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]])
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// SIMD1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]]
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_Z6foobarv
+// SIMD1-SAME: () #[[ATTR5:[0-9]+]] {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4
+// SIMD1-NEXT:    store i32 [[TMP0]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4
+// SIMD1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+// SIMD1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// SIMD1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// SIMD1-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4
+// SIMD1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// SIMD1-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4
+// SIMD1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+// SIMD1-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4
+// SIMD1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// SIMD1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4
+// SIMD1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP11]] to i32
+// SIMD1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]]
+// SIMD1-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4
+// SIMD1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// SIMD1-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4
+// SIMD1-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD1-NEXT:    ret i32 [[TMP15]]
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// SIMD1-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// SIMD1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// SIMD1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]]
+// SIMD1:       init.check:
+// SIMD1-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23)
+// SIMD1-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]]
+// SIMD1-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// SIMD1-NEXT:    br label [[INIT_END]]
+// SIMD1:       init.end:
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// SIMD1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]])
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// SIMD1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// SIMD1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// SIMD1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    store i32 0, i32* [[A]], align 4
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// SIMD1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// SIMD1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    store i32 0, i32* [[A]], align 8
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// SIMD1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// SIMD1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    store i32 0, i32* [[A]], align 8
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// SIMD1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// SIMD1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// SIMD1-NEXT:    store i32 0, i32* [[A]], align 4
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// SIMD1-SAME: () #[[ATTR0]] {
+// SIMD1-NEXT:  entry:
+// SIMD1-NEXT:    call void @__cxx_global_var_init()
+// SIMD1-NEXT:    call void @__cxx_global_var_init.1()
+// SIMD1-NEXT:    call void @__cxx_global_var_init.2()
+// SIMD1-NEXT:    ret void
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// SIMD2-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG118:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG120:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG121:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// SIMD2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]]
+// SIMD2-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG129:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// SIMD2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG134:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// SIMD2-SAME: () #[[ATTR0]] !dbg [[DBG135:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG136:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG138:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG139:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// SIMD2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG140:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG146:![0-9]+]]
+// SIMD2-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG146]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG147:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// SIMD2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG148:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG151:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG152:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// SIMD2-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG153:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// SIMD2-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// SIMD2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// SIMD2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154:![0-9]+]]
+// SIMD2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156:![0-9]+]]
+// SIMD2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// SIMD2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG157:![0-9]+]]
+// SIMD2:       invoke.cont:
+// SIMD2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]]
+// SIMD2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// SIMD2-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG158:![0-9]+]]
+// SIMD2:       invoke.cont2:
+// SIMD2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]]
+// SIMD2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// SIMD2-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG159:![0-9]+]]
+// SIMD2:       invoke.cont3:
+// SIMD2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]]
+// SIMD2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160:![0-9]+]]
+// SIMD2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// SIMD2-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG161:![0-9]+]]
+// SIMD2:       invoke.cont7:
+// SIMD2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]]
+// SIMD2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// SIMD2-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG162:![0-9]+]]
+// SIMD2:       invoke.cont8:
+// SIMD2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]]
+// SIMD2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// SIMD2-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG163:![0-9]+]]
+// SIMD2:       invoke.cont9:
+// SIMD2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG164:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG164]]
+// SIMD2:       lpad:
+// SIMD2-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// SIMD2-NEXT:    cleanup, !dbg [[DBG165:![0-9]+]]
+// SIMD2-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG165]]
+// SIMD2-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]]
+// SIMD2-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG165]]
+// SIMD2-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]]
+// SIMD2-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG156]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG156]]
+// SIMD2:       arraydestroy.body:
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG156]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG156]]
+// SIMD2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG156]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG156]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG156]]
+// SIMD2:       arraydestroy.done4:
+// SIMD2-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG156]]
+// SIMD2:       lpad6:
+// SIMD2-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// SIMD2-NEXT:    cleanup, !dbg [[DBG165]]
+// SIMD2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG165]]
+// SIMD2-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]]
+// SIMD2-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG165]]
+// SIMD2-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]]
+// SIMD2-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG160]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG160]]
+// SIMD2:       arraydestroy.body11:
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG160]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG160]]
+// SIMD2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG160]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG160]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG160]]
+// SIMD2:       arraydestroy.done15:
+// SIMD2-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG160]]
+// SIMD2:       ehcleanup:
+// SIMD2-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]]
+// SIMD2-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG154]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG154]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG154]]
+// SIMD2:       arraydestroy.body17:
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG154]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG154]]
+// SIMD2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG154]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG154]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG154]]
+// SIMD2:       arraydestroy.done21:
+// SIMD2-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG154]]
+// SIMD2:       eh.resume:
+// SIMD2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG154]]
+// SIMD2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG154]]
+// SIMD2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG154]]
+// SIMD2-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG154]]
+// SIMD2-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG154]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// SIMD2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG166:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// SIMD2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]]
+// SIMD2-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG171]]
+// SIMD2:       arraydestroy.body:
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG171]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG171]]
+// SIMD2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG171]]
+// SIMD2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG171]]
+// SIMD2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG171]]
+// SIMD2:       arraydestroy.done1:
+// SIMD2-NEXT:    ret void, !dbg [[DBG171]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@main
+// SIMD2-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG174:![0-9]+]]
+// SIMD2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG174]]
+// SIMD2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG174]], !prof [[PROF175:![0-9]+]]
+// SIMD2:       init.check:
+// SIMD2-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG174]]
+// SIMD2-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG174]]
+// SIMD2:       init:
+// SIMD2-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG176:![0-9]+]]
+// SIMD2-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]])
+// SIMD2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG177:![0-9]+]]
+// SIMD2:       invoke.cont:
+// SIMD2-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD2-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD2-NEXT:    br label [[INIT_END]], !dbg [[DBG174]]
+// SIMD2:       init.end:
+// SIMD2-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG178:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[TMP4]], i32* [[RES]], align 4, !dbg [[DBG179:![0-9]+]]
+// SIMD2-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG180:![0-9]+]]
+// SIMD2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG181:![0-9]+]]
+// SIMD2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG181]]
+// SIMD2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG181]]
+// SIMD2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG182:![0-9]+]]
+// SIMD2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG183:![0-9]+]]
+// SIMD2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG183]]
+// SIMD2-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG183]]
+// SIMD2-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG184:![0-9]+]]
+// SIMD2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]]
+// SIMD2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG185]]
+// SIMD2-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG185]]
+// SIMD2-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG186:![0-9]+]]
+// SIMD2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG187:![0-9]+]]
+// SIMD2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG187]]
+// SIMD2-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG187]]
+// SIMD2-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG188:![0-9]+]]
+// SIMD2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG189:![0-9]+]]
+// SIMD2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG189]]
+// SIMD2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG189]]
+// SIMD2-NEXT:    [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG190:![0-9]+]]
+// SIMD2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG191:![0-9]+]]
+// SIMD2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG191]]
+// SIMD2-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG191]]
+// SIMD2-NEXT:    [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG192:![0-9]+]]
+// SIMD2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG192]]
+// SIMD2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG193:![0-9]+]]
+// SIMD2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG193]]
+// SIMD2-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG193]]
+// SIMD2-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG194:![0-9]+]]
+// SIMD2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG195:![0-9]+]]
+// SIMD2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG195]]
+// SIMD2-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG195]]
+// SIMD2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG196:![0-9]+]]
+// SIMD2-NEXT:    ret i32 [[TMP21]], !dbg [[DBG197:![0-9]+]]
+// SIMD2:       lpad:
+// SIMD2-NEXT:    [[TMP22:%.*]] = landingpad { i8*, i32 }
+// SIMD2-NEXT:    cleanup, !dbg [[DBG198:![0-9]+]]
+// SIMD2-NEXT:    [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0, !dbg [[DBG198]]
+// SIMD2-NEXT:    store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG198]]
+// SIMD2-NEXT:    [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1, !dbg [[DBG198]]
+// SIMD2-NEXT:    store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG198]]
+// SIMD2-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD2-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG174]]
+// SIMD2:       eh.resume:
+// SIMD2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG174]]
+// SIMD2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG174]]
+// SIMD2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG174]]
+// SIMD2-NEXT:    [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG174]]
+// SIMD2-NEXT:    resume { i8*, i32 } [[LPAD_VAL8]], !dbg [[DBG174]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// SIMD2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG199:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]]
+// SIMD2-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG205]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG206:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// SIMD2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG207:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG210:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG211:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_Z6foobarv
+// SIMD2-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG212:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG215:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[TMP0]], i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]]
+// SIMD2-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG217:![0-9]+]]
+// SIMD2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG218:![0-9]+]]
+// SIMD2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]], !dbg [[DBG218]]
+// SIMD2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG218]]
+// SIMD2-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG219:![0-9]+]]
+// SIMD2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG220:![0-9]+]]
+// SIMD2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG220]]
+// SIMD2-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG220]]
+// SIMD2-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG221:![0-9]+]]
+// SIMD2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG222:![0-9]+]]
+// SIMD2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG222]]
+// SIMD2-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG222]]
+// SIMD2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG223:![0-9]+]]
+// SIMD2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]]
+// SIMD2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG224]]
+// SIMD2-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG224]]
+// SIMD2-NEXT:    [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG225:![0-9]+]]
+// SIMD2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]]
+// SIMD2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG226]]
+// SIMD2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG226]]
+// SIMD2-NEXT:    [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG227:![0-9]+]]
+// SIMD2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP11]] to i32, !dbg [[DBG227]]
+// SIMD2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG228:![0-9]+]]
+// SIMD2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]], !dbg [[DBG228]]
+// SIMD2-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG228]]
+// SIMD2-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG229:![0-9]+]]
+// SIMD2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG230:![0-9]+]]
+// SIMD2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG230]]
+// SIMD2-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG230]]
+// SIMD2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]]
+// SIMD2-NEXT:    ret i32 [[TMP15]], !dbg [[DBG232:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// SIMD2-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG233:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234:![0-9]+]]
+// SIMD2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG234]]
+// SIMD2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG234]]
+// SIMD2:       init.check:
+// SIMD2-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG235:![0-9]+]]
+// SIMD2-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG234]]
+// SIMD2-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234]]
+// SIMD2-NEXT:    br label [[INIT_END]], !dbg [[DBG234]]
+// SIMD2:       init.end:
+// SIMD2-NEXT:    ret void, !dbg [[DBG237:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// SIMD2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG238:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG244:![0-9]+]]
+// SIMD2-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG244]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG245:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// SIMD2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG246:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG249:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG250:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// SIMD2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG251:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG256:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG257:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG256]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG258:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// SIMD2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG259:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]]
+// SIMD2-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG264:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG265:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// SIMD2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG266:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG272:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG271]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG273:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// SIMD2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG274:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]]
+// SIMD2-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG279:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG280:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// SIMD2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG281:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG287:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG286]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG288:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// SIMD2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG289:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG292:![0-9]+]]
+// SIMD2-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG294:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG295:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// SIMD2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG296:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG301:![0-9]+]]
+// SIMD2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]]
+// SIMD2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG301]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG303:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// SIMD2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG304:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]]
+// SIMD2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]]
+// SIMD2-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG309:![0-9]+]]
+// SIMD2-NEXT:    ret void, !dbg [[DBG310:![0-9]+]]
+//
+//
+// SIMD2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// SIMD2-SAME: () #[[ATTR0]] !dbg [[DBG311:![0-9]+]] {
+// SIMD2-NEXT:  entry:
+// SIMD2-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG313:![0-9]+]]
+// SIMD2-NEXT:    call void @__cxx_global_var_init.1(), !dbg [[DBG313]]
+// SIMD2-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG313]]
+// SIMD2-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK-TLS1-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5)
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// CHECK-TLS1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// CHECK-TLS1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// CHECK-TLS1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// CHECK-TLS1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// CHECK-TLS1-SAME: () #[[ATTR0]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27)
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// CHECK-TLS1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// CHECK-TLS1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// CHECK-TLS1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// CHECK-TLS1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// CHECK-TLS1-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK-TLS1-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK-TLS1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK-TLS1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// CHECK-TLS1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK-TLS1:       invoke.cont:
+// CHECK-TLS1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// CHECK-TLS1-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+// CHECK-TLS1:       invoke.cont2:
+// CHECK-TLS1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// CHECK-TLS1-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// CHECK-TLS1:       invoke.cont3:
+// CHECK-TLS1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK-TLS1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// CHECK-TLS1-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]]
+// CHECK-TLS1:       invoke.cont7:
+// CHECK-TLS1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// CHECK-TLS1-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]]
+// CHECK-TLS1:       invoke.cont8:
+// CHECK-TLS1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// CHECK-TLS1-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]]
+// CHECK-TLS1:       invoke.cont9:
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    ret void
+// CHECK-TLS1:       lpad:
+// CHECK-TLS1-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS1-NEXT:    cleanup
+// CHECK-TLS1-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// CHECK-TLS1-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// CHECK-TLS1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// CHECK-TLS1-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK-TLS1-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]]
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK-TLS1:       arraydestroy.body:
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK-TLS1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]]
+// CHECK-TLS1:       arraydestroy.done4:
+// CHECK-TLS1-NEXT:    br label [[EHCLEANUP:%.*]]
+// CHECK-TLS1:       lpad6:
+// CHECK-TLS1-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS1-NEXT:    cleanup
+// CHECK-TLS1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0
+// CHECK-TLS1-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8
+// CHECK-TLS1-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1
+// CHECK-TLS1-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK-TLS1-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]]
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]]
+// CHECK-TLS1:       arraydestroy.body11:
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1
+// CHECK-TLS1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0)
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]]
+// CHECK-TLS1:       arraydestroy.done15:
+// CHECK-TLS1-NEXT:    br label [[EHCLEANUP]]
+// CHECK-TLS1:       ehcleanup:
+// CHECK-TLS1-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK-TLS1-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]]
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// CHECK-TLS1:       arraydestroy.body17:
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// CHECK-TLS1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// CHECK-TLS1:       arraydestroy.done21:
+// CHECK-TLS1-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK-TLS1:       eh.resume:
+// CHECK-TLS1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK-TLS1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK-TLS1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK-TLS1-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK-TLS1-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]]
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK-TLS1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK-TLS1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK-TLS1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK-TLS1:       arraydestroy.body:
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK-TLS1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0)
+// CHECK-TLS1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK-TLS1:       arraydestroy.done1:
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@main
+// CHECK-TLS1-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1
+// CHECK-TLS1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-TLS1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]]
+// CHECK-TLS1:       init.check:
+// CHECK-TLS1-NEXT:    [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1()
+// CHECK-TLS1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
+// CHECK-TLS1-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]])
+// CHECK-TLS1-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    store i8 1, i8* @_ZGVZ4mainE2sm, align 1
+// CHECK-TLS1-NEXT:    br label [[INIT_END]]
+// CHECK-TLS1:       init.end:
+// CHECK-TLS1-NEXT:    [[TMP4:%.*]] = call %struct.S3* @_ZTWN6Static1sE()
+// CHECK-TLS1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK-TLS1-NEXT:    store i32 [[TMP5]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8
+// CHECK-TLS1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1()
+// CHECK-TLS1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A2]], align 4
+// CHECK-TLS1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK-TLS1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3()
+// CHECK-TLS1-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A5]], align 4
+// CHECK-TLS1-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x()
+// CHECK-TLS1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1
+// CHECK-TLS1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK-TLS1-NEXT:    [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A8]], align 4
+// CHECK-TLS1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP19:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4
+// CHECK-TLS1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP21:%.*]] = load float, float* @_ZN2STIfE2stE, align 4
+// CHECK-TLS1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP21]] to i32
+// CHECK-TLS1-NEXT:    [[TMP22:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[CONV]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP23:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE()
+// CHECK-TLS1-NEXT:    [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP23]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[A12]], align 4
+// CHECK-TLS1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP24]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD13]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP26:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    ret i32 [[TMP26]]
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWL3gs1
+// CHECK-TLS1-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK-TLS1-NEXT:    call void @_ZTHL3gs1()
+// CHECK-TLS1-NEXT:    ret %struct.S1* @_ZL3gs1
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// CHECK-TLS1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// CHECK-TLS1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWN6Static1sE
+// CHECK-TLS1-SAME: () #[[ATTR5]] comdat {
+// CHECK-TLS1-NEXT:    br i1 icmp ne (void ()* @_ZTHN6Static1sE, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS1:       1:
+// CHECK-TLS1-NEXT:    call void @_ZTHN6Static1sE()
+// CHECK-TLS1-NEXT:    br label [[TMP2]]
+// CHECK-TLS1:       2:
+// CHECK-TLS1-NEXT:    ret %struct.S3* @_ZN6Static1sE
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTW3gs3
+// CHECK-TLS1-SAME: () #[[ATTR5]] comdat {
+// CHECK-TLS1-NEXT:    br i1 icmp ne (void ()* @_ZTH3gs3, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS1:       1:
+// CHECK-TLS1-NEXT:    call void @_ZTH3gs3()
+// CHECK-TLS1-NEXT:    br label [[TMP2]]
+// CHECK-TLS1:       2:
+// CHECK-TLS1-NEXT:    ret %struct.S5* @gs3
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTW5arr_x
+// CHECK-TLS1-SAME: () #[[ATTR5]] comdat {
+// CHECK-TLS1-NEXT:    call void @_ZTH5arr_x()
+// CHECK-TLS1-NEXT:    ret [2 x [3 x %struct.S1]]* @arr_x
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE
+// CHECK-TLS1-SAME: () #[[ATTR5]] comdat {
+// CHECK-TLS1-NEXT:    call void @_ZTHN2STI2S4E2stE()
+// CHECK-TLS1-NEXT:    ret %struct.S4* @_ZN2STI2S4E2stE
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// CHECK-TLS1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// CHECK-TLS1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_Z6foobarv
+// CHECK-TLS1-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE()
+// CHECK-TLS1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4
+// CHECK-TLS1-NEXT:    store i32 [[TMP1]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1()
+// CHECK-TLS1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK-TLS1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK-TLS1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3()
+// CHECK-TLS1-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A3]], align 4
+// CHECK-TLS1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x()
+// CHECK-TLS1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1
+// CHECK-TLS1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK-TLS1-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[A6]], align 4
+// CHECK-TLS1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP13:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4
+// CHECK-TLS1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP15:%.*]] = load float, float* @_ZN2STIfE2stE, align 4
+// CHECK-TLS1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP15]] to i32
+// CHECK-TLS1-NEXT:    [[TMP16:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[CONV]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP17:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE()
+// CHECK-TLS1-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP17]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[A10]], align 4
+// CHECK-TLS1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP18]]
+// CHECK-TLS1-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS1-NEXT:    ret i32 [[TMP20]]
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// CHECK-TLS1-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK-TLS1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-TLS1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]]
+// CHECK-TLS1:       init.check:
+// CHECK-TLS1-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23)
+// CHECK-TLS1-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK-TLS1-NEXT:    br label [[INIT_END]]
+// CHECK-TLS1:       init.end:
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// CHECK-TLS1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// CHECK-TLS1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// CHECK-TLS1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// CHECK-TLS1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK-TLS1-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// CHECK-TLS1-SAME: () #[[ATTR0]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    call void @__cxx_global_var_init.1()
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS1-LABEL: define {{[^@]+}}@__tls_init
+// CHECK-TLS1-SAME: () #[[ATTR0]] {
+// CHECK-TLS1-NEXT:  entry:
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1
+// CHECK-TLS1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-TLS1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF2]]
+// CHECK-TLS1:       init:
+// CHECK-TLS1-NEXT:    store i8 1, i8* @__tls_guard, align 1
+// CHECK-TLS1-NEXT:    call void @__cxx_global_var_init()
+// CHECK-TLS1-NEXT:    call void @__cxx_global_var_init.2()
+// CHECK-TLS1-NEXT:    br label [[EXIT]]
+// CHECK-TLS1:       exit:
+// CHECK-TLS1-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@main
+// CHECK-TLS2-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1
+// CHECK-TLS2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-TLS2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]]
+// CHECK-TLS2:       init.check:
+// CHECK-TLS2-NEXT:    [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1()
+// CHECK-TLS2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
+// CHECK-TLS2-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]])
+// CHECK-TLS2-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR4:[0-9]+]]
+// CHECK-TLS2-NEXT:    store i8 1, i8* @_ZGVZ4mainE2sm, align 1
+// CHECK-TLS2-NEXT:    br label [[INIT_END]]
+// CHECK-TLS2:       init.end:
+// CHECK-TLS2-NEXT:    [[TMP4:%.*]] = call %struct.S3* @_ZTWN6Static1sE()
+// CHECK-TLS2-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK-TLS2-NEXT:    store i32 [[TMP5]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8
+// CHECK-TLS2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1()
+// CHECK-TLS2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A2]], align 4
+// CHECK-TLS2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK-TLS2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3()
+// CHECK-TLS2-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A5]], align 4
+// CHECK-TLS2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x()
+// CHECK-TLS2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1
+// CHECK-TLS2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK-TLS2-NEXT:    [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A8]], align 4
+// CHECK-TLS2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP19:%.*]] = call i32* @_ZTWN2STIiE2stE()
+// CHECK-TLS2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4
+// CHECK-TLS2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP22:%.*]] = call float* @_ZTWN2STIfE2stE()
+// CHECK-TLS2-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+// CHECK-TLS2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP23]] to i32
+// CHECK-TLS2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[CONV]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP25:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE()
+// CHECK-TLS2-NEXT:    [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP25]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP26:%.*]] = load i32, i32* [[A12]], align 4
+// CHECK-TLS2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP27]], [[TMP26]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD13]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    ret i32 [[TMP28]]
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWL3gs1
+// CHECK-TLS2-SAME: () #[[ATTR1:[0-9]+]] {
+// CHECK-TLS2-NEXT:    call void @_ZTHL3gs1()
+// CHECK-TLS2-NEXT:    ret %struct.S1* @_ZL3gs1
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// CHECK-TLS2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// CHECK-TLS2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN6Static1sE
+// CHECK-TLS2-SAME: () #[[ATTR1]] comdat {
+// CHECK-TLS2-NEXT:    br i1 icmp ne (void ()* @_ZTHN6Static1sE, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS2:       1:
+// CHECK-TLS2-NEXT:    call void @_ZTHN6Static1sE()
+// CHECK-TLS2-NEXT:    br label [[TMP2]]
+// CHECK-TLS2:       2:
+// CHECK-TLS2-NEXT:    ret %struct.S3* @_ZN6Static1sE
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTW3gs3
+// CHECK-TLS2-SAME: () #[[ATTR1]] comdat {
+// CHECK-TLS2-NEXT:    br i1 icmp ne (void ()* @_ZTH3gs3, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS2:       1:
+// CHECK-TLS2-NEXT:    call void @_ZTH3gs3()
+// CHECK-TLS2-NEXT:    br label [[TMP2]]
+// CHECK-TLS2:       2:
+// CHECK-TLS2-NEXT:    ret %struct.S5* @gs3
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTW5arr_x
+// CHECK-TLS2-SAME: () #[[ATTR1]] comdat {
+// CHECK-TLS2-NEXT:    call void @_ZTH5arr_x()
+// CHECK-TLS2-NEXT:    ret [2 x [3 x %struct.S1]]* @arr_x
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE
+// CHECK-TLS2-SAME: () #[[ATTR1]] comdat {
+// CHECK-TLS2-NEXT:    ret i32* @_ZN2STIiE2stE
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE
+// CHECK-TLS2-SAME: () #[[ATTR1]] comdat {
+// CHECK-TLS2-NEXT:    ret float* @_ZN2STIfE2stE
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE
+// CHECK-TLS2-SAME: () #[[ATTR1]] comdat {
+// CHECK-TLS2-NEXT:    call void @_ZTHN2STI2S4E2stE()
+// CHECK-TLS2-NEXT:    ret %struct.S4* @_ZN2STI2S4E2stE
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_Z6foobarv
+// CHECK-TLS2-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE()
+// CHECK-TLS2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4
+// CHECK-TLS2-NEXT:    store i32 [[TMP1]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1()
+// CHECK-TLS2-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK-TLS2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// CHECK-TLS2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3()
+// CHECK-TLS2-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A3]], align 4
+// CHECK-TLS2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x()
+// CHECK-TLS2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1
+// CHECK-TLS2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1
+// CHECK-TLS2-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[A6]], align 4
+// CHECK-TLS2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP13:%.*]] = call i32* @_ZTWN2STIiE2stE()
+// CHECK-TLS2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK-TLS2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP16:%.*]] = call float* @_ZTWN2STIfE2stE()
+// CHECK-TLS2-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP16]], align 4
+// CHECK-TLS2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP17]] to i32
+// CHECK-TLS2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE()
+// CHECK-TLS2-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[A10]], align 4
+// CHECK-TLS2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
+// CHECK-TLS2-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[RES]], align 4
+// CHECK-TLS2-NEXT:    ret i32 [[TMP22]]
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK-TLS2-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5)
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// CHECK-TLS2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// CHECK-TLS2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// CHECK-TLS2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// CHECK-TLS2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// CHECK-TLS2-SAME: () #[[ATTR6]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27)
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// CHECK-TLS2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// CHECK-TLS2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// CHECK-TLS2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// CHECK-TLS2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// CHECK-TLS2-SAME: () #[[ATTR6]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK-TLS2-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK-TLS2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK-TLS2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// CHECK-TLS2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// CHECK-TLS2:       invoke.cont:
+// CHECK-TLS2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// CHECK-TLS2-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+// CHECK-TLS2:       invoke.cont2:
+// CHECK-TLS2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// CHECK-TLS2-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// CHECK-TLS2:       invoke.cont3:
+// CHECK-TLS2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK-TLS2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// CHECK-TLS2-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]]
+// CHECK-TLS2:       invoke.cont7:
+// CHECK-TLS2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// CHECK-TLS2-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]]
+// CHECK-TLS2:       invoke.cont8:
+// CHECK-TLS2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// CHECK-TLS2-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]]
+// CHECK-TLS2:       invoke.cont9:
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+// CHECK-TLS2:       lpad:
+// CHECK-TLS2-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS2-NEXT:    cleanup
+// CHECK-TLS2-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// CHECK-TLS2-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// CHECK-TLS2-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// CHECK-TLS2-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK-TLS2-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]]
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK-TLS2:       arraydestroy.body:
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK-TLS2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]]
+// CHECK-TLS2:       arraydestroy.done4:
+// CHECK-TLS2-NEXT:    br label [[EHCLEANUP:%.*]]
+// CHECK-TLS2:       lpad6:
+// CHECK-TLS2-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS2-NEXT:    cleanup
+// CHECK-TLS2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0
+// CHECK-TLS2-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8
+// CHECK-TLS2-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1
+// CHECK-TLS2-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK-TLS2-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]]
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]]
+// CHECK-TLS2:       arraydestroy.body11:
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1
+// CHECK-TLS2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0)
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]]
+// CHECK-TLS2:       arraydestroy.done15:
+// CHECK-TLS2-NEXT:    br label [[EHCLEANUP]]
+// CHECK-TLS2:       ehcleanup:
+// CHECK-TLS2-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// CHECK-TLS2-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]]
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// CHECK-TLS2:       arraydestroy.body17:
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// CHECK-TLS2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// CHECK-TLS2:       arraydestroy.done21:
+// CHECK-TLS2-NEXT:    br label [[EH_RESUME:%.*]]
+// CHECK-TLS2:       eh.resume:
+// CHECK-TLS2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// CHECK-TLS2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK-TLS2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// CHECK-TLS2-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK-TLS2-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]]
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK-TLS2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR6]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK-TLS2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK-TLS2-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// CHECK-TLS2:       arraydestroy.body:
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK-TLS2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0)
+// CHECK-TLS2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// CHECK-TLS2:       arraydestroy.done1:
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// CHECK-TLS2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// CHECK-TLS2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    store i32 0, i32* [[A]], align 8
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// CHECK-TLS2-SAME: () #[[ATTR6]] comdat($_ZN2STI2S4E2stE) {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK-TLS2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-TLS2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]]
+// CHECK-TLS2:       init.check:
+// CHECK-TLS2-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23)
+// CHECK-TLS2-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// CHECK-TLS2-NEXT:    br label [[INIT_END]]
+// CHECK-TLS2:       init.end:
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// CHECK-TLS2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]])
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// CHECK-TLS2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR4]]
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// CHECK-TLS2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-TLS2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// CHECK-TLS2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// CHECK-TLS2-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// CHECK-TLS2-SAME: () #[[ATTR6]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    call void @__cxx_global_var_init.1()
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS2-LABEL: define {{[^@]+}}@__tls_init
+// CHECK-TLS2-SAME: () #[[ATTR6]] {
+// CHECK-TLS2-NEXT:  entry:
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1
+// CHECK-TLS2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-TLS2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF2]]
+// CHECK-TLS2:       init:
+// CHECK-TLS2-NEXT:    store i8 1, i8* @__tls_guard, align 1
+// CHECK-TLS2-NEXT:    call void @__cxx_global_var_init()
+// CHECK-TLS2-NEXT:    call void @__cxx_global_var_init.2()
+// CHECK-TLS2-NEXT:    br label [[EXIT]]
+// CHECK-TLS2:       exit:
+// CHECK-TLS2-NEXT:    ret void
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK-TLS3-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG118:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG120:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG121:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// CHECK-TLS3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG129:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// CHECK-TLS3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG134:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// CHECK-TLS3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG135:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG140:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG141:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG140]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG142:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// CHECK-TLS3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG143:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG146:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG148:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG149:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG150:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG151:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG153:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG154:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// CHECK-TLS3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG155:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG161:![0-9]+]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG161]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG162:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// CHECK-TLS3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG163:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG165:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG166:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG167:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// CHECK-TLS3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG168:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG173:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG174:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG173]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG175:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// CHECK-TLS3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG176:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META177:![0-9]+]], metadata !DIExpression()), !dbg [[DBG178:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG179:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG181:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG182:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// CHECK-TLS3-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG183:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK-TLS3-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS3-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK-TLS3-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS3-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG184:![0-9]+]]
+// CHECK-TLS3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG186:![0-9]+]]
+// CHECK-TLS3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// CHECK-TLS3-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG187:![0-9]+]]
+// CHECK-TLS3:       invoke.cont:
+// CHECK-TLS3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// CHECK-TLS3-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG188:![0-9]+]]
+// CHECK-TLS3:       invoke.cont2:
+// CHECK-TLS3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// CHECK-TLS3-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG189:![0-9]+]]
+// CHECK-TLS3:       invoke.cont3:
+// CHECK-TLS3-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG190:![0-9]+]]
+// CHECK-TLS3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// CHECK-TLS3-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG191:![0-9]+]]
+// CHECK-TLS3:       invoke.cont7:
+// CHECK-TLS3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// CHECK-TLS3-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG192:![0-9]+]]
+// CHECK-TLS3:       invoke.cont8:
+// CHECK-TLS3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// CHECK-TLS3-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG193:![0-9]+]]
+// CHECK-TLS3:       invoke.cont9:
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG194:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG194]]
+// CHECK-TLS3:       lpad:
+// CHECK-TLS3-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS3-NEXT:    cleanup, !dbg [[DBG195:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG186]]
+// CHECK-TLS3:       arraydestroy.body:
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG186]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG186]]
+// CHECK-TLS3:       arraydestroy.done4:
+// CHECK-TLS3-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG186]]
+// CHECK-TLS3:       lpad6:
+// CHECK-TLS3-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS3-NEXT:    cleanup, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG195]]
+// CHECK-TLS3-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG190]]
+// CHECK-TLS3:       arraydestroy.body11:
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG190]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG190]]
+// CHECK-TLS3:       arraydestroy.done15:
+// CHECK-TLS3-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG190]]
+// CHECK-TLS3:       ehcleanup:
+// CHECK-TLS3-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG184]]
+// CHECK-TLS3:       arraydestroy.body17:
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG184]]
+// CHECK-TLS3:       arraydestroy.done21:
+// CHECK-TLS3-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG184]]
+// CHECK-TLS3:       eh.resume:
+// CHECK-TLS3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG184]]
+// CHECK-TLS3-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG184]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK-TLS3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG196:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK-TLS3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201:![0-9]+]]
+// CHECK-TLS3-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG201]]
+// CHECK-TLS3:       arraydestroy.body:
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG201]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG201]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG201]]
+// CHECK-TLS3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG201]]
+// CHECK-TLS3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG201]]
+// CHECK-TLS3:       arraydestroy.done1:
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG201]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@main
+// CHECK-TLS3-SAME: () #[[ATTR5:[0-9]+]] !dbg [[DBG53:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG204:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG204]]
+// CHECK-TLS3-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG204]], !prof [[PROF205:![0-9]+]]
+// CHECK-TLS3:       init.check:
+// CHECK-TLS3-NEXT:    [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG206:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0, !dbg [[DBG207:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG207]]
+// CHECK-TLS3-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]]), !dbg [[DBG208:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG204]]
+// CHECK-TLS3-NEXT:    store i8 1, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG204]]
+// CHECK-TLS3-NEXT:    br label [[INIT_END]], !dbg [[DBG204]]
+// CHECK-TLS3:       init.end:
+// CHECK-TLS3-NEXT:    [[TMP4:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG209:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG210:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG210]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG211:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG212:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG213:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG213]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG213]]
+// CHECK-TLS3-NEXT:    [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG214:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0, !dbg [[DBG215:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A2]], align 4, !dbg [[DBG215]]
+// CHECK-TLS3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG216]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG216]]
+// CHECK-TLS3-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG217:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG218:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG218]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG218]]
+// CHECK-TLS3-NEXT:    [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG219:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0, !dbg [[DBG220:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A5]], align 4, !dbg [[DBG220]]
+// CHECK-TLS3-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG221:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG221]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG221]]
+// CHECK-TLS3-NEXT:    [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG222:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1, !dbg [[DBG222]]
+// CHECK-TLS3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG222]]
+// CHECK-TLS3-NEXT:    [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0, !dbg [[DBG223:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A8]], align 4, !dbg [[DBG223]]
+// CHECK-TLS3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG224]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG224]]
+// CHECK-TLS3-NEXT:    [[TMP19:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG225:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG226]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG226]]
+// CHECK-TLS3-NEXT:    [[TMP21:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG227:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[CONV:%.*]] = fptosi float [[TMP21]] to i32, !dbg [[DBG227]]
+// CHECK-TLS3-NEXT:    [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG228:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[CONV]], !dbg [[DBG228]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG228]]
+// CHECK-TLS3-NEXT:    [[TMP23:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG229:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP23]], i32 0, i32 0, !dbg [[DBG230:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP24:%.*]] = load i32, i32* [[A12]], align 4, !dbg [[DBG230]]
+// CHECK-TLS3-NEXT:    [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP24]], !dbg [[DBG231]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD13]], i32* [[RES]], align 4, !dbg [[DBG231]]
+// CHECK-TLS3-NEXT:    [[TMP26:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG232:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret i32 [[TMP26]], !dbg [[DBG233:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWL3gs1
+// CHECK-TLS3-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK-TLS3-NEXT:    call void @_ZTHL3gs1()
+// CHECK-TLS3-NEXT:    ret %struct.S1* @_ZL3gs1
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// CHECK-TLS3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG234:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG240:![0-9]+]]
+// CHECK-TLS3-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG240]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG241:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// CHECK-TLS3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG242:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META243:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG245:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG246:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN6Static1sE
+// CHECK-TLS3-SAME: () #[[ATTR6]] comdat {
+// CHECK-TLS3-NEXT:    br i1 icmp ne (void ()* @_ZTHN6Static1sE, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS3:       1:
+// CHECK-TLS3-NEXT:    call void @_ZTHN6Static1sE()
+// CHECK-TLS3-NEXT:    br label [[TMP2]]
+// CHECK-TLS3:       2:
+// CHECK-TLS3-NEXT:    ret %struct.S3* @_ZN6Static1sE
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW3gs3
+// CHECK-TLS3-SAME: () #[[ATTR6]] comdat {
+// CHECK-TLS3-NEXT:    br i1 icmp ne (void ()* @_ZTH3gs3, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS3:       1:
+// CHECK-TLS3-NEXT:    call void @_ZTH3gs3()
+// CHECK-TLS3-NEXT:    br label [[TMP2]]
+// CHECK-TLS3:       2:
+// CHECK-TLS3-NEXT:    ret %struct.S5* @gs3
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW5arr_x
+// CHECK-TLS3-SAME: () #[[ATTR6]] comdat {
+// CHECK-TLS3-NEXT:    call void @_ZTH5arr_x()
+// CHECK-TLS3-NEXT:    ret [2 x [3 x %struct.S1]]* @arr_x
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE
+// CHECK-TLS3-SAME: () #[[ATTR6]] comdat {
+// CHECK-TLS3-NEXT:    call void @_ZTHN2STI2S4E2stE()
+// CHECK-TLS3-NEXT:    ret %struct.S4* @_ZN2STI2S4E2stE
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// CHECK-TLS3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG247:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META248:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG252:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG253:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG252]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG254:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// CHECK-TLS3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG255:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG258:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG260:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG261:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_Z6foobarv
+// CHECK-TLS3-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG262:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META263:![0-9]+]], metadata !DIExpression()), !dbg [[DBG264:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG265:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG266:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG266]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG267:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG268:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG269:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG269]]
+// CHECK-TLS3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG270:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG270]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG270]]
+// CHECK-TLS3-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG271:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG272:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG272]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG272]]
+// CHECK-TLS3-NEXT:    [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG273:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG274:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG274]]
+// CHECK-TLS3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG275:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG275]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG275]]
+// CHECK-TLS3-NEXT:    [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG276:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG276]]
+// CHECK-TLS3-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG276]]
+// CHECK-TLS3-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG277]]
+// CHECK-TLS3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG278:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG278]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG278]]
+// CHECK-TLS3-NEXT:    [[TMP13:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG279:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG280:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG280]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG280]]
+// CHECK-TLS3-NEXT:    [[TMP15:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG281:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[CONV:%.*]] = fptosi float [[TMP15]] to i32, !dbg [[DBG281]]
+// CHECK-TLS3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG282:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[CONV]], !dbg [[DBG282]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG282]]
+// CHECK-TLS3-NEXT:    [[TMP17:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG283:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP17]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG284]]
+// CHECK-TLS3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG285:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP18]], !dbg [[DBG285]]
+// CHECK-TLS3-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG285]]
+// CHECK-TLS3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG286:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret i32 [[TMP20]], !dbg [[DBG287:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// CHECK-TLS3-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG288:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG289:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG289]]
+// CHECK-TLS3-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG289]]
+// CHECK-TLS3:       init.check:
+// CHECK-TLS3-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG290:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG289]]
+// CHECK-TLS3-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG289]]
+// CHECK-TLS3-NEXT:    br label [[INIT_END]], !dbg [[DBG289]]
+// CHECK-TLS3:       init.end:
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG292:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// CHECK-TLS3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG293:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META294:![0-9]+]], metadata !DIExpression()), !dbg [[DBG296:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG299:![0-9]+]]
+// CHECK-TLS3-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG299]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG300:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// CHECK-TLS3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG301:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG304:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG305:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// CHECK-TLS3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG306:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META307:![0-9]+]], metadata !DIExpression()), !dbg [[DBG308:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG311:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG312:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG311]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG313:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// CHECK-TLS3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG314:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META315:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]]
+// CHECK-TLS3-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG319:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void, !dbg [[DBG320:![0-9]+]]
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG321:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    call void @__cxx_global_var_init.1(), !dbg [[DBG323:![0-9]+]]
+// CHECK-TLS3-NEXT:    ret void
+//
+//
+// CHECK-TLS3-LABEL: define {{[^@]+}}@__tls_init
+// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG324:![0-9]+]] {
+// CHECK-TLS3-NEXT:  entry:
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG325:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG325]]
+// CHECK-TLS3-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG325]], !prof [[PROF205]]
+// CHECK-TLS3:       init:
+// CHECK-TLS3-NEXT:    store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG325]]
+// CHECK-TLS3-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG325]]
+// CHECK-TLS3-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG325]]
+// CHECK-TLS3-NEXT:    br label [[EXIT]], !dbg [[DBG325]]
+// CHECK-TLS3:       exit:
+// CHECK-TLS3-NEXT:    ret void
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@main
+// CHECK-TLS4-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG10:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i8, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG117:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG117]]
+// CHECK-TLS4-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG117]], !prof [[PROF118:![0-9]+]]
+// CHECK-TLS4:       init.check:
+// CHECK-TLS4-NEXT:    [[TMP1:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG119:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP1]], i32 0, i32 0, !dbg [[DBG120:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG120]]
+// CHECK-TLS4-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]]), !dbg [[DBG121:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR5:[0-9]+]], !dbg [[DBG117]]
+// CHECK-TLS4-NEXT:    store i8 1, i8* @_ZGVZ4mainE2sm, align 1, !dbg [[DBG117]]
+// CHECK-TLS4-NEXT:    br label [[INIT_END]], !dbg [[DBG117]]
+// CHECK-TLS4:       init.end:
+// CHECK-TLS4-NEXT:    [[TMP4:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG122:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG123:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG123]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG124:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG125:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG126:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG126]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG126]]
+// CHECK-TLS4-NEXT:    [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG127:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0, !dbg [[DBG128:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A2]], align 4, !dbg [[DBG128]]
+// CHECK-TLS4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG129:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG129]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG129]]
+// CHECK-TLS4-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG130:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG131:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG131]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG131]]
+// CHECK-TLS4-NEXT:    [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG132:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0, !dbg [[DBG133:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A5]], align 4, !dbg [[DBG133]]
+// CHECK-TLS4-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG134:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG134]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG134]]
+// CHECK-TLS4-NEXT:    [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG135:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1, !dbg [[DBG135]]
+// CHECK-TLS4-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG135]]
+// CHECK-TLS4-NEXT:    [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0, !dbg [[DBG136:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A8]], align 4, !dbg [[DBG136]]
+// CHECK-TLS4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG137:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG137]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG137]]
+// CHECK-TLS4-NEXT:    [[TMP19:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG138:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG138]]
+// CHECK-TLS4-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG139:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG139]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG139]]
+// CHECK-TLS4-NEXT:    [[TMP22:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG140:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4, !dbg [[DBG140]]
+// CHECK-TLS4-NEXT:    [[CONV:%.*]] = fptosi float [[TMP23]] to i32, !dbg [[DBG140]]
+// CHECK-TLS4-NEXT:    [[TMP24:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG141:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[CONV]], !dbg [[DBG141]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG141]]
+// CHECK-TLS4-NEXT:    [[TMP25:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG142:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP25]], i32 0, i32 0, !dbg [[DBG143:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP26:%.*]] = load i32, i32* [[A12]], align 4, !dbg [[DBG143]]
+// CHECK-TLS4-NEXT:    [[TMP27:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG144:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP27]], [[TMP26]], !dbg [[DBG144]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD13]], i32* [[RES]], align 4, !dbg [[DBG144]]
+// CHECK-TLS4-NEXT:    [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG145:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret i32 [[TMP28]], !dbg [[DBG146:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWL3gs1
+// CHECK-TLS4-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK-TLS4-NEXT:    call void @_ZTHL3gs1()
+// CHECK-TLS4-NEXT:    ret %struct.S1* @_ZL3gs1
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// CHECK-TLS4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG147:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG153:![0-9]+]]
+// CHECK-TLS4-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG153]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG154:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// CHECK-TLS4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] align 2 !dbg [[DBG155:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR5]], !dbg [[DBG158:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG159:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN6Static1sE
+// CHECK-TLS4-SAME: () #[[ATTR2]] comdat {
+// CHECK-TLS4-NEXT:    br i1 icmp ne (void ()* @_ZTHN6Static1sE, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS4:       1:
+// CHECK-TLS4-NEXT:    call void @_ZTHN6Static1sE()
+// CHECK-TLS4-NEXT:    br label [[TMP2]]
+// CHECK-TLS4:       2:
+// CHECK-TLS4-NEXT:    ret %struct.S3* @_ZN6Static1sE
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW3gs3
+// CHECK-TLS4-SAME: () #[[ATTR2]] comdat {
+// CHECK-TLS4-NEXT:    br i1 icmp ne (void ()* @_ZTH3gs3, void ()* null), label [[TMP1:%.*]], label [[TMP2:%.*]]
+// CHECK-TLS4:       1:
+// CHECK-TLS4-NEXT:    call void @_ZTH3gs3()
+// CHECK-TLS4-NEXT:    br label [[TMP2]]
+// CHECK-TLS4:       2:
+// CHECK-TLS4-NEXT:    ret %struct.S5* @gs3
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW5arr_x
+// CHECK-TLS4-SAME: () #[[ATTR2]] comdat {
+// CHECK-TLS4-NEXT:    call void @_ZTH5arr_x()
+// CHECK-TLS4-NEXT:    ret [2 x [3 x %struct.S1]]* @arr_x
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE
+// CHECK-TLS4-SAME: () #[[ATTR2]] comdat {
+// CHECK-TLS4-NEXT:    ret i32* @_ZN2STIiE2stE
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE
+// CHECK-TLS4-SAME: () #[[ATTR2]] comdat {
+// CHECK-TLS4-NEXT:    ret float* @_ZN2STIfE2stE
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE
+// CHECK-TLS4-SAME: () #[[ATTR2]] comdat {
+// CHECK-TLS4-NEXT:    call void @_ZTHN2STI2S4E2stE()
+// CHECK-TLS4-NEXT:    ret %struct.S4* @_ZN2STI2S4E2stE
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_Z6foobarv
+// CHECK-TLS4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG160:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG163:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG164:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG164]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG165:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG166:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG167:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG167]]
+// CHECK-TLS4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG168:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG168]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG168]]
+// CHECK-TLS4-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG169:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG170:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG170]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG170]]
+// CHECK-TLS4-NEXT:    [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG171:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG172:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG172]]
+// CHECK-TLS4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG173:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG173]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG173]]
+// CHECK-TLS4-NEXT:    [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG174:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG174]]
+// CHECK-TLS4-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG174]]
+// CHECK-TLS4-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG175]]
+// CHECK-TLS4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG176:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG176]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG176]]
+// CHECK-TLS4-NEXT:    [[TMP13:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG177:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG177]]
+// CHECK-TLS4-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG178:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG178]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG178]]
+// CHECK-TLS4-NEXT:    [[TMP16:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG179:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG179]]
+// CHECK-TLS4-NEXT:    [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG179]]
+// CHECK-TLS4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG180:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG180]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG180]]
+// CHECK-TLS4-NEXT:    [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG181:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0, !dbg [[DBG182:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG182]]
+// CHECK-TLS4-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG183:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG183]]
+// CHECK-TLS4-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG183]]
+// CHECK-TLS4-NEXT:    [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG184:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret i32 [[TMP22]], !dbg [[DBG185:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// CHECK-TLS4-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG186:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG189:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG191:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG192:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// CHECK-TLS4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG193:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META194:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG198:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG199:![0-9]+]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG199]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG200:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// CHECK-TLS4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG201:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR5]], !dbg [[DBG204:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG205:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// CHECK-TLS4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG206:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META207:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG211:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG212:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG211]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG213:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// CHECK-TLS4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG214:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META215:![0-9]+]], metadata !DIExpression()), !dbg [[DBG216:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG219:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG220:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG221:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG222:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG224:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG225:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// CHECK-TLS4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG226:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG232:![0-9]+]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG232]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG233:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// CHECK-TLS4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG234:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR5]], !dbg [[DBG237:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG238:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// CHECK-TLS4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG239:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG244:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG244]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG246:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// CHECK-TLS4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG247:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// CHECK-TLS4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META248:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG250:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG252:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG253:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// CHECK-TLS4-SAME: () #[[ATTR7]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG254:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// CHECK-TLS4-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS4-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// CHECK-TLS4-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// CHECK-TLS4-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG255:![0-9]+]]
+// CHECK-TLS4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG257:![0-9]+]]
+// CHECK-TLS4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// CHECK-TLS4-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG258:![0-9]+]]
+// CHECK-TLS4:       invoke.cont:
+// CHECK-TLS4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// CHECK-TLS4-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG259:![0-9]+]]
+// CHECK-TLS4:       invoke.cont2:
+// CHECK-TLS4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// CHECK-TLS4-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG260:![0-9]+]]
+// CHECK-TLS4:       invoke.cont3:
+// CHECK-TLS4-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG261:![0-9]+]]
+// CHECK-TLS4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// CHECK-TLS4-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG262:![0-9]+]]
+// CHECK-TLS4:       invoke.cont7:
+// CHECK-TLS4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// CHECK-TLS4-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG263:![0-9]+]]
+// CHECK-TLS4:       invoke.cont8:
+// CHECK-TLS4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// CHECK-TLS4-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG264:![0-9]+]]
+// CHECK-TLS4:       invoke.cont9:
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG265:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG265]]
+// CHECK-TLS4:       lpad:
+// CHECK-TLS4-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS4-NEXT:    cleanup, !dbg [[DBG266:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG257]]
+// CHECK-TLS4:       arraydestroy.body:
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG257]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG257]]
+// CHECK-TLS4:       arraydestroy.done4:
+// CHECK-TLS4-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG257]]
+// CHECK-TLS4:       lpad6:
+// CHECK-TLS4-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// CHECK-TLS4-NEXT:    cleanup, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG266]]
+// CHECK-TLS4-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG261]]
+// CHECK-TLS4:       arraydestroy.body11:
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR5]], !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG261]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG261]]
+// CHECK-TLS4:       arraydestroy.done15:
+// CHECK-TLS4-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG261]]
+// CHECK-TLS4:       ehcleanup:
+// CHECK-TLS4-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG255]]
+// CHECK-TLS4:       arraydestroy.body17:
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR5]], !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG255]]
+// CHECK-TLS4:       arraydestroy.done21:
+// CHECK-TLS4-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG255]]
+// CHECK-TLS4:       eh.resume:
+// CHECK-TLS4-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG255]]
+// CHECK-TLS4-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG255]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// CHECK-TLS4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR7]] !dbg [[DBG267:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK-TLS4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]]
+// CHECK-TLS4-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG272]]
+// CHECK-TLS4:       arraydestroy.body:
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG272]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG272]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG272]]
+// CHECK-TLS4-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG272]]
+// CHECK-TLS4-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG272]]
+// CHECK-TLS4:       arraydestroy.done1:
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG272]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// CHECK-TLS4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG273:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG278:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG279:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG278]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG280:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// CHECK-TLS4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG281:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// CHECK-TLS4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG286:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG287:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// CHECK-TLS4-SAME: () #[[ATTR7]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG288:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG289:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG289]]
+// CHECK-TLS4-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG289]]
+// CHECK-TLS4:       init.check:
+// CHECK-TLS4-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG290:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG289]]
+// CHECK-TLS4-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG289]]
+// CHECK-TLS4-NEXT:    br label [[INIT_END]], !dbg [[DBG289]]
+// CHECK-TLS4:       init.end:
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG292:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// CHECK-TLS4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG293:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META294:![0-9]+]], metadata !DIExpression()), !dbg [[DBG296:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG299:![0-9]+]]
+// CHECK-TLS4-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG299]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG300:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// CHECK-TLS4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG301:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR5]], !dbg [[DBG304:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG305:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// CHECK-TLS4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG306:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-TLS4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META307:![0-9]+]], metadata !DIExpression()), !dbg [[DBG308:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG311:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG312:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG311]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG313:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// CHECK-TLS4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG314:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// CHECK-TLS4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META315:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// CHECK-TLS4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]]
+// CHECK-TLS4-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG319:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void, !dbg [[DBG320:![0-9]+]]
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG321:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    call void @__cxx_global_var_init.1(), !dbg [[DBG323:![0-9]+]]
+// CHECK-TLS4-NEXT:    ret void
+//
+//
+// CHECK-TLS4-LABEL: define {{[^@]+}}@__tls_init
+// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG324:![0-9]+]] {
+// CHECK-TLS4-NEXT:  entry:
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG325:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG325]]
+// CHECK-TLS4-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG325]], !prof [[PROF118]]
+// CHECK-TLS4:       init:
+// CHECK-TLS4-NEXT:    store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG325]]
+// CHECK-TLS4-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG325]]
+// CHECK-TLS4-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG325]]
+// CHECK-TLS4-NEXT:    br label [[EXIT]], !dbg [[DBG325]]
+// CHECK-TLS4:       exit:
+// CHECK-TLS4-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// SIMD3-SAME: () #[[ATTR0:[0-9]+]] {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5)
+// SIMD3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]]
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// SIMD3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]])
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// SIMD3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// SIMD3-SAME: () #[[ATTR0]] {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27)
+// SIMD3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]]
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// SIMD3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]])
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// SIMD3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// SIMD3-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// SIMD3-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// SIMD3-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD3-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// SIMD3-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// SIMD3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// SIMD3-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// SIMD3:       invoke.cont:
+// SIMD3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// SIMD3-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+// SIMD3:       invoke.cont2:
+// SIMD3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// SIMD3-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+// SIMD3:       invoke.cont3:
+// SIMD3-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// SIMD3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// SIMD3-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]]
+// SIMD3:       invoke.cont7:
+// SIMD3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// SIMD3-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]]
+// SIMD3:       invoke.cont8:
+// SIMD3-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD3-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// SIMD3-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]]
+// SIMD3:       invoke.cont9:
+// SIMD3-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]]
+// SIMD3-NEXT:    ret void
+// SIMD3:       lpad:
+// SIMD3-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// SIMD3-NEXT:    cleanup
+// SIMD3-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+// SIMD3-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8
+// SIMD3-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
+// SIMD3-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD3-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8
+// SIMD3-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]]
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]]
+// SIMD3:       arraydestroy.body:
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// SIMD3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// SIMD3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]]
+// SIMD3:       arraydestroy.done4:
+// SIMD3-NEXT:    br label [[EHCLEANUP:%.*]]
+// SIMD3:       lpad6:
+// SIMD3-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// SIMD3-NEXT:    cleanup
+// SIMD3-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0
+// SIMD3-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8
+// SIMD3-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1
+// SIMD3-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD3-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8
+// SIMD3-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]]
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]]
+// SIMD3:       arraydestroy.body11:
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ]
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1
+// SIMD3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]]
+// SIMD3-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0)
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]]
+// SIMD3:       arraydestroy.done15:
+// SIMD3-NEXT:    br label [[EHCLEANUP]]
+// SIMD3:       ehcleanup:
+// SIMD3-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8
+// SIMD3-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0
+// SIMD3-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]]
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]]
+// SIMD3:       arraydestroy.body17:
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ]
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1
+// SIMD3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]]
+// SIMD3-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0)
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]]
+// SIMD3:       arraydestroy.done21:
+// SIMD3-NEXT:    br label [[EH_RESUME:%.*]]
+// SIMD3:       eh.resume:
+// SIMD3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// SIMD3-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD3-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// SIMD3-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// SIMD3-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]]
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// SIMD3-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// SIMD3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// SIMD3-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
+// SIMD3:       arraydestroy.body:
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// SIMD3-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// SIMD3-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// SIMD3-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0)
+// SIMD3-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]]
+// SIMD3:       arraydestroy.done1:
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@main
+// SIMD3-SAME: () #[[ATTR4:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD3-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// SIMD3-NEXT:    [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8
+// SIMD3-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// SIMD3-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF2:![0-9]+]]
+// SIMD3:       init.check:
+// SIMD3-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// SIMD3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// SIMD3-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]]
+// SIMD3:       init:
+// SIMD3-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4
+// SIMD3-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]])
+// SIMD3-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+// SIMD3:       invoke.cont:
+// SIMD3-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]]
+// SIMD3-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// SIMD3-NEXT:    br label [[INIT_END]]
+// SIMD3:       init.end:
+// SIMD3-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4
+// SIMD3-NEXT:    store i32 [[TMP4]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8
+// SIMD3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// SIMD3-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4
+// SIMD3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+// SIMD3-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// SIMD3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// SIMD3-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4
+// SIMD3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
+// SIMD3-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4
+// SIMD3-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// SIMD3-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4
+// SIMD3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+// SIMD3-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4
+// SIMD3-NEXT:    [[CONV:%.*]] = fptosi float [[TMP17]] to i32
+// SIMD3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]]
+// SIMD3-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4
+// SIMD3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// SIMD3-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    ret i32 [[TMP21]]
+// SIMD3:       lpad:
+// SIMD3-NEXT:    [[TMP22:%.*]] = landingpad { i8*, i32 }
+// SIMD3-NEXT:    cleanup
+// SIMD3-NEXT:    [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0
+// SIMD3-NEXT:    store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8
+// SIMD3-NEXT:    [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1
+// SIMD3-NEXT:    store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD3-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]]
+// SIMD3-NEXT:    br label [[EH_RESUME:%.*]]
+// SIMD3:       eh.resume:
+// SIMD3-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
+// SIMD3-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
+// SIMD3-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
+// SIMD3-NEXT:    [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// SIMD3-NEXT:    resume { i8*, i32 } [[LPAD_VAL8]]
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// SIMD3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]])
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// SIMD3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]]
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_Z6foobarv
+// SIMD3-SAME: () #[[ATTR5:[0-9]+]] {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4
+// SIMD3-NEXT:    store i32 [[TMP0]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4
+// SIMD3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+// SIMD3-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8
+// SIMD3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// SIMD3-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4
+// SIMD3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
+// SIMD3-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4
+// SIMD3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+// SIMD3-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4
+// SIMD3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// SIMD3-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4
+// SIMD3-NEXT:    [[CONV:%.*]] = fptosi float [[TMP11]] to i32
+// SIMD3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]]
+// SIMD3-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4
+// SIMD3-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// SIMD3-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4
+// SIMD3-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4
+// SIMD3-NEXT:    ret i32 [[TMP15]]
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// SIMD3-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// SIMD3-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// SIMD3-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]]
+// SIMD3:       init.check:
+// SIMD3-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23)
+// SIMD3-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]]
+// SIMD3-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8
+// SIMD3-NEXT:    br label [[INIT_END]]
+// SIMD3:       init.end:
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// SIMD3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]])
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// SIMD3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// SIMD3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// SIMD3-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    store i32 0, i32* [[A]], align 4
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// SIMD3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// SIMD3-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD3-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    store i32 0, i32* [[A]], align 8
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// SIMD3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// SIMD3-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD3-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    store i32 0, i32* [[A]], align 8
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// SIMD3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// SIMD3-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// SIMD3-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD3-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0
+// SIMD3-NEXT:    store i32 0, i32* [[A]], align 4
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// SIMD3-SAME: () #[[ATTR0]] {
+// SIMD3-NEXT:  entry:
+// SIMD3-NEXT:    call void @__cxx_global_var_init()
+// SIMD3-NEXT:    call void @__cxx_global_var_init.1()
+// SIMD3-NEXT:    call void @__cxx_global_var_init.2()
+// SIMD3-NEXT:    ret void
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// SIMD4-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG118:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG120:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG121:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// SIMD4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]]
+// SIMD4-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG129:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// SIMD4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG134:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// SIMD4-SAME: () #[[ATTR0]] !dbg [[DBG135:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG136:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG138:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG139:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// SIMD4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG140:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG146:![0-9]+]]
+// SIMD4-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG146]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG147:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// SIMD4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG148:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG151:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG152:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// SIMD4-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG153:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// SIMD4-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// SIMD4-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD4-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// SIMD4-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154:![0-9]+]]
+// SIMD4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156:![0-9]+]]
+// SIMD4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// SIMD4-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG157:![0-9]+]]
+// SIMD4:       invoke.cont:
+// SIMD4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]]
+// SIMD4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// SIMD4-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG158:![0-9]+]]
+// SIMD4:       invoke.cont2:
+// SIMD4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]]
+// SIMD4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// SIMD4-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG159:![0-9]+]]
+// SIMD4:       invoke.cont3:
+// SIMD4-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]]
+// SIMD4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160:![0-9]+]]
+// SIMD4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// SIMD4-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG161:![0-9]+]]
+// SIMD4:       invoke.cont7:
+// SIMD4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]]
+// SIMD4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// SIMD4-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG162:![0-9]+]]
+// SIMD4:       invoke.cont8:
+// SIMD4-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]]
+// SIMD4-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// SIMD4-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG163:![0-9]+]]
+// SIMD4:       invoke.cont9:
+// SIMD4-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG164:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG164]]
+// SIMD4:       lpad:
+// SIMD4-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// SIMD4-NEXT:    cleanup, !dbg [[DBG165:![0-9]+]]
+// SIMD4-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG165]]
+// SIMD4-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]]
+// SIMD4-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG165]]
+// SIMD4-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]]
+// SIMD4-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG156]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG156]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG156]]
+// SIMD4:       arraydestroy.body:
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG156]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG156]]
+// SIMD4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG156]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG156]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG156]]
+// SIMD4:       arraydestroy.done4:
+// SIMD4-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG156]]
+// SIMD4:       lpad6:
+// SIMD4-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// SIMD4-NEXT:    cleanup, !dbg [[DBG165]]
+// SIMD4-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG165]]
+// SIMD4-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG165]]
+// SIMD4-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG165]]
+// SIMD4-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG165]]
+// SIMD4-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG160]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG160]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG160]]
+// SIMD4:       arraydestroy.body11:
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG160]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG160]]
+// SIMD4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG160]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG160]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG160]]
+// SIMD4:       arraydestroy.done15:
+// SIMD4-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG160]]
+// SIMD4:       ehcleanup:
+// SIMD4-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG154]]
+// SIMD4-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG154]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG154]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG154]]
+// SIMD4:       arraydestroy.body17:
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG154]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG154]]
+// SIMD4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG154]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG154]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG154]]
+// SIMD4:       arraydestroy.done21:
+// SIMD4-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG154]]
+// SIMD4:       eh.resume:
+// SIMD4-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG154]]
+// SIMD4-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG154]]
+// SIMD4-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG154]]
+// SIMD4-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG154]]
+// SIMD4-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG154]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// SIMD4-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG166:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// SIMD4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]]
+// SIMD4-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG171]]
+// SIMD4:       arraydestroy.body:
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG171]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG171]]
+// SIMD4-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG171]]
+// SIMD4-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG171]]
+// SIMD4-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG171]]
+// SIMD4:       arraydestroy.done1:
+// SIMD4-NEXT:    ret void, !dbg [[DBG171]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@main
+// SIMD4-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// SIMD4-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG174:![0-9]+]]
+// SIMD4-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG174]]
+// SIMD4-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG174]], !prof [[PROF175:![0-9]+]]
+// SIMD4:       init.check:
+// SIMD4-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD4-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0, !dbg [[DBG174]]
+// SIMD4-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG174]]
+// SIMD4:       init:
+// SIMD4-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG176:![0-9]+]]
+// SIMD4-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP2]])
+// SIMD4-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG177:![0-9]+]]
+// SIMD4:       invoke.cont:
+// SIMD4-NEXT:    [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD4-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD4-NEXT:    br label [[INIT_END]], !dbg [[DBG174]]
+// SIMD4:       init.end:
+// SIMD4-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG178:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[TMP4]], i32* [[RES]], align 4, !dbg [[DBG179:![0-9]+]]
+// SIMD4-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG180:![0-9]+]]
+// SIMD4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG181:![0-9]+]]
+// SIMD4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG181]]
+// SIMD4-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG181]]
+// SIMD4-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG182:![0-9]+]]
+// SIMD4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG183:![0-9]+]]
+// SIMD4-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG183]]
+// SIMD4-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG183]]
+// SIMD4-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG184:![0-9]+]]
+// SIMD4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]]
+// SIMD4-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG185]]
+// SIMD4-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG185]]
+// SIMD4-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG186:![0-9]+]]
+// SIMD4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG187:![0-9]+]]
+// SIMD4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG187]]
+// SIMD4-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG187]]
+// SIMD4-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG188:![0-9]+]]
+// SIMD4-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG189:![0-9]+]]
+// SIMD4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG189]]
+// SIMD4-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG189]]
+// SIMD4-NEXT:    [[TMP15:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG190:![0-9]+]]
+// SIMD4-NEXT:    [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG191:![0-9]+]]
+// SIMD4-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG191]]
+// SIMD4-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG191]]
+// SIMD4-NEXT:    [[TMP17:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG192:![0-9]+]]
+// SIMD4-NEXT:    [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG192]]
+// SIMD4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG193:![0-9]+]]
+// SIMD4-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG193]]
+// SIMD4-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG193]]
+// SIMD4-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG194:![0-9]+]]
+// SIMD4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG195:![0-9]+]]
+// SIMD4-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG195]]
+// SIMD4-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG195]]
+// SIMD4-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG196:![0-9]+]]
+// SIMD4-NEXT:    ret i32 [[TMP21]], !dbg [[DBG197:![0-9]+]]
+// SIMD4:       lpad:
+// SIMD4-NEXT:    [[TMP22:%.*]] = landingpad { i8*, i32 }
+// SIMD4-NEXT:    cleanup, !dbg [[DBG198:![0-9]+]]
+// SIMD4-NEXT:    [[TMP23:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 0, !dbg [[DBG198]]
+// SIMD4-NEXT:    store i8* [[TMP23]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG198]]
+// SIMD4-NEXT:    [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP22]], 1, !dbg [[DBG198]]
+// SIMD4-NEXT:    store i32 [[TMP24]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG198]]
+// SIMD4-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG174]]
+// SIMD4-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG174]]
+// SIMD4:       eh.resume:
+// SIMD4-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG174]]
+// SIMD4-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG174]]
+// SIMD4-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG174]]
+// SIMD4-NEXT:    [[LPAD_VAL8:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG174]]
+// SIMD4-NEXT:    resume { i8*, i32 } [[LPAD_VAL8]], !dbg [[DBG174]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// SIMD4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG199:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]]
+// SIMD4-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG205]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG206:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// SIMD4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG207:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG210:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG211:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_Z6foobarv
+// SIMD4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG212:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S3:%.*]], %struct.S3* @_ZN6Static1sE, i32 0, i32 0), align 4, !dbg [[DBG215:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[TMP0]], i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]]
+// SIMD4-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* @_ZL3gs1, i32 0, i32 0), align 4, !dbg [[DBG217:![0-9]+]]
+// SIMD4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG218:![0-9]+]]
+// SIMD4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]], !dbg [[DBG218]]
+// SIMD4-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG218]]
+// SIMD4-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG219:![0-9]+]]
+// SIMD4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG220:![0-9]+]]
+// SIMD4-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG220]]
+// SIMD4-NEXT:    store i32 [[ADD1]], i32* [[RES]], align 4, !dbg [[DBG220]]
+// SIMD4-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S5:%.*]], %struct.S5* @gs3, i32 0, i32 0), align 4, !dbg [[DBG221:![0-9]+]]
+// SIMD4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG222:![0-9]+]]
+// SIMD4-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG222]]
+// SIMD4-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG222]]
+// SIMD4-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1, i32 0), align 4, !dbg [[DBG223:![0-9]+]]
+// SIMD4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]]
+// SIMD4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG224]]
+// SIMD4-NEXT:    store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG224]]
+// SIMD4-NEXT:    [[TMP9:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG225:![0-9]+]]
+// SIMD4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]]
+// SIMD4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG226]]
+// SIMD4-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG226]]
+// SIMD4-NEXT:    [[TMP11:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG227:![0-9]+]]
+// SIMD4-NEXT:    [[CONV:%.*]] = fptosi float [[TMP11]] to i32, !dbg [[DBG227]]
+// SIMD4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG228:![0-9]+]]
+// SIMD4-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP12]], [[CONV]], !dbg [[DBG228]]
+// SIMD4-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG228]]
+// SIMD4-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S4:%.*]], %struct.S4* @_ZN2STI2S4E2stE, i32 0, i32 0), align 4, !dbg [[DBG229:![0-9]+]]
+// SIMD4-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG230:![0-9]+]]
+// SIMD4-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG230]]
+// SIMD4-NEXT:    store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG230]]
+// SIMD4-NEXT:    [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]]
+// SIMD4-NEXT:    ret i32 [[TMP15]], !dbg [[DBG232:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
+// SIMD4-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG233:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234:![0-9]+]]
+// SIMD4-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG234]]
+// SIMD4-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG234]]
+// SIMD4:       init.check:
+// SIMD4-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG235:![0-9]+]]
+// SIMD4-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG234]]
+// SIMD4-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG234]]
+// SIMD4-NEXT:    br label [[INIT_END]], !dbg [[DBG234]]
+// SIMD4:       init.end:
+// SIMD4-NEXT:    ret void, !dbg [[DBG237:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// SIMD4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG238:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG244:![0-9]+]]
+// SIMD4-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG244]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG245:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// SIMD4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG246:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG249:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG250:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// SIMD4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG251:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG256:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG257:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG256]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG258:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// SIMD4-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG259:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// SIMD4-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]]
+// SIMD4-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG264:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG265:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// SIMD4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG266:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG272:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG271]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG273:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// SIMD4-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG274:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// SIMD4-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]]
+// SIMD4-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG279:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG280:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// SIMD4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG281:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG287:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG286]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG288:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// SIMD4-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG289:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// SIMD4-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG292:![0-9]+]]
+// SIMD4-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG294:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG295:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// SIMD4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG296:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// SIMD4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG301:![0-9]+]]
+// SIMD4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]]
+// SIMD4-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG301]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG303:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// SIMD4-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG304:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// SIMD4-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]]
+// SIMD4-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// SIMD4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]]
+// SIMD4-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG309:![0-9]+]]
+// SIMD4-NEXT:    ret void, !dbg [[DBG310:![0-9]+]]
+//
+//
+// SIMD4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// SIMD4-SAME: () #[[ATTR0]] !dbg [[DBG311:![0-9]+]] {
+// SIMD4-NEXT:  entry:
+// SIMD4-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG313:![0-9]+]]
+// SIMD4-NEXT:    call void @__cxx_global_var_init.1(), !dbg [[DBG313]]
+// SIMD4-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG313]]
+// SIMD4-NEXT:    ret void
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG120:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG120]]
+// DEBUG1-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[TMP2]], i32 5), !dbg [[DBG121:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG120]]
+// DEBUG1-NEXT:    ret i8* [[TMP3]], !dbg [[DBG120]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// DEBUG1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG122:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]]
+// DEBUG1-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG128]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG129:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG130:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG132]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG132]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR4:[0-9]+]], !dbg [[DBG132]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG133:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// DEBUG1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 !dbg [[DBG134:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG137:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG138:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_.
+// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG139:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG140:![0-9]+]]
+// DEBUG1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG140]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG140]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG141:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// DEBUG1-NEXT:    [[ARRAYINIT_ENDOFINIT2:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[ARRAYINIT_ENDOFINIT9:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG144:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG144]]
+// DEBUG1-NEXT:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG145:![0-9]+]]
+// DEBUG1-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[ARRAYINIT_BEGIN1:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG146:![0-9]+]]
+// DEBUG1-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN1]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN1]], i32 1)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG147:![0-9]+]]
+// DEBUG1:       invoke.cont:
+// DEBUG1-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYINIT_BEGIN1]], i64 1, !dbg [[DBG146]]
+// DEBUG1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG148:![0-9]+]]
+// DEBUG1:       invoke.cont3:
+// DEBUG1-NEXT:    [[ARRAYINIT_ELEMENT4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT]], i64 1, !dbg [[DBG146]]
+// DEBUG1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT4]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT4]], i32 3)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]], !dbg [[DBG149:![0-9]+]]
+// DEBUG1:       invoke.cont5:
+// DEBUG1-NEXT:    [[ARRAYINIT_ELEMENT7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 1, !dbg [[DBG145]]
+// DEBUG1-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[ARRAYINIT_BEGIN8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], i64 0, i64 0, !dbg [[DBG150:![0-9]+]]
+// DEBUG1-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN8]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN8]], i32 4)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT11:%.*]] unwind label [[LPAD10:%.*]], !dbg [[DBG151:![0-9]+]]
+// DEBUG1:       invoke.cont11:
+// DEBUG1-NEXT:    [[ARRAYINIT_ELEMENT12:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_BEGIN8]], i64 1, !dbg [[DBG150]]
+// DEBUG1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT12]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT12]], i32 5)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT13:%.*]] unwind label [[LPAD10]], !dbg [[DBG152:![0-9]+]]
+// DEBUG1:       invoke.cont13:
+// DEBUG1-NEXT:    [[ARRAYINIT_ELEMENT14:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT12]], i64 1, !dbg [[DBG150]]
+// DEBUG1-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT14]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT14]], i32 6)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT15:%.*]] unwind label [[LPAD10]], !dbg [[DBG153:![0-9]+]]
+// DEBUG1:       invoke.cont15:
+// DEBUG1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG144]]
+// DEBUG1-NEXT:    ret i8* [[TMP3]], !dbg [[DBG144]]
+// DEBUG1:       lpad:
+// DEBUG1-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    cleanup, !dbg [[DBG143]]
+// DEBUG1-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG143]]
+// DEBUG1-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG143]]
+// DEBUG1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG143]]
+// DEBUG1-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG143]]
+// DEBUG1-NEXT:    [[TMP7:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG146]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN1]], [[TMP7]], !dbg [[DBG146]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG146]]
+// DEBUG1:       arraydestroy.body:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP7]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG146]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG146]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG146]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAYINIT_BEGIN1]], !dbg [[DBG146]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG146]]
+// DEBUG1:       arraydestroy.done6:
+// DEBUG1-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG146]]
+// DEBUG1:       lpad10:
+// DEBUG1-NEXT:    [[TMP8:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    cleanup, !dbg [[DBG143]]
+// DEBUG1-NEXT:    [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0, !dbg [[DBG143]]
+// DEBUG1-NEXT:    store i8* [[TMP9]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG143]]
+// DEBUG1-NEXT:    [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 1, !dbg [[DBG143]]
+// DEBUG1-NEXT:    store i32 [[TMP10]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG143]]
+// DEBUG1-NEXT:    [[TMP11:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG150]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN8]], [[TMP11]], !dbg [[DBG150]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG150]]
+// DEBUG1:       arraydestroy.body17:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[TMP11]], [[LPAD10]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG150]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG150]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG150]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], [[ARRAYINIT_BEGIN8]], !dbg [[DBG150]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG150]]
+// DEBUG1:       arraydestroy.done21:
+// DEBUG1-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG150]]
+// DEBUG1:       ehcleanup:
+// DEBUG1-NEXT:    [[TMP12:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[PAD_ARRAYBEGIN:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP12]], i64 0, i64 0, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ISEMPTY22:%.*]] = icmp eq %struct.S1* [[PAD_ARRAYBEGIN]], [[PAD_ARRAYEND]], !dbg [[DBG145]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY22]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY23:%.*]], !dbg [[DBG145]]
+// DEBUG1:       arraydestroy.body23:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST24:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT25:%.*]], [[ARRAYDESTROY_BODY23]] ], !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT25]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST24]], i64 -1, !dbg [[DBG145]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT25]]) #[[ATTR4]], !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE26:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT25]], [[PAD_ARRAYBEGIN]], !dbg [[DBG145]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE26]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY23]], !dbg [[DBG145]]
+// DEBUG1:       arraydestroy.done27:
+// DEBUG1-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG145]]
+// DEBUG1:       eh.resume:
+// DEBUG1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG145]]
+// DEBUG1-NEXT:    [[LPAD_VAL28:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG145]]
+// DEBUG1-NEXT:    resume { i8*, i32 } [[LPAD_VAL28]], !dbg [[DBG145]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG154:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG156]]
+// DEBUG1-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG156]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAY_BEGIN]], i64 6, !dbg [[DBG156]]
+// DEBUG1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG156]]
+// DEBUG1:       arraydestroy.body:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG156]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG156]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG156]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG156]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG156]]
+// DEBUG1:       arraydestroy.done1:
+// DEBUG1-NEXT:    ret void, !dbg [[DBG157:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3
+// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG158:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG159:![0-9]+]]
+// DEBUG1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG159]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG159]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG160:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG163:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG165:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG166:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// DEBUG1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG167:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META168:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG172:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG173:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG172]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG174:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// DEBUG1-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG175:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG178:![0-9]+]]
+// DEBUG1-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG180:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG181:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
+// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG182:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG183:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG185:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG186:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// DEBUG1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG187:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG190:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META191:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG193:![0-9]+]]
+// DEBUG1-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG193]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG194:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// DEBUG1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG195:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR4]], !dbg [[DBG198:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG199:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// DEBUG1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG200:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG206:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG205]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG207:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// DEBUG1-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG208:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG1-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG211:![0-9]+]]
+// DEBUG1-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG213:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG214:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@__cxx_global_var_init.5
+// DEBUG1-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG215:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// DEBUG1-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// DEBUG1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG216:![0-9]+]]
+// DEBUG1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG218:![0-9]+]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG219:![0-9]+]]
+// DEBUG1:       invoke.cont:
+// DEBUG1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG218]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG220:![0-9]+]]
+// DEBUG1:       invoke.cont2:
+// DEBUG1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG218]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG221:![0-9]+]]
+// DEBUG1:       invoke.cont3:
+// DEBUG1-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG216]]
+// DEBUG1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG222:![0-9]+]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG223:![0-9]+]]
+// DEBUG1:       invoke.cont7:
+// DEBUG1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG222]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG224:![0-9]+]]
+// DEBUG1:       invoke.cont8:
+// DEBUG1-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG222]]
+// DEBUG1-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// DEBUG1-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG225:![0-9]+]]
+// DEBUG1:       invoke.cont9:
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG226:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG226]]
+// DEBUG1:       lpad:
+// DEBUG1-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    cleanup, !dbg [[DBG227:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG227]]
+// DEBUG1-NEXT:    store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG227]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG227]]
+// DEBUG1-NEXT:    store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG227]]
+// DEBUG1-NEXT:    [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG218]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG218]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG218]]
+// DEBUG1:       arraydestroy.body:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG218]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG218]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG218]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG218]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG218]]
+// DEBUG1:       arraydestroy.done4:
+// DEBUG1-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG218]]
+// DEBUG1:       lpad6:
+// DEBUG1-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    cleanup, !dbg [[DBG227]]
+// DEBUG1-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG227]]
+// DEBUG1-NEXT:    store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG227]]
+// DEBUG1-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG227]]
+// DEBUG1-NEXT:    store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG227]]
+// DEBUG1-NEXT:    [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG222]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG222]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG222]]
+// DEBUG1:       arraydestroy.body11:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG222]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG222]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR4]], !dbg [[DBG222]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG222]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG222]]
+// DEBUG1:       arraydestroy.done15:
+// DEBUG1-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG222]]
+// DEBUG1:       ehcleanup:
+// DEBUG1-NEXT:    [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG216]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG216]]
+// DEBUG1:       arraydestroy.body17:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG216]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG216]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG216]]
+// DEBUG1:       arraydestroy.done21:
+// DEBUG1-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG216]]
+// DEBUG1:       eh.resume:
+// DEBUG1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG216]]
+// DEBUG1-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG216]]
+// DEBUG1-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG216]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG228:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META231:![0-9]+]], metadata !DIExpression()), !dbg [[DBG232:![0-9]+]]
+// DEBUG1-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG232]]
+// DEBUG1:       arraydestroy.body:
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG232]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG232]]
+// DEBUG1-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG232]]
+// DEBUG1-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG232]]
+// DEBUG1-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG232]]
+// DEBUG1:       arraydestroy.done1:
+// DEBUG1-NEXT:    ret void, !dbg [[DBG232]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@main
+// DEBUG1-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]])
+// DEBUG1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG235:![0-9]+]]
+// DEBUG1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0, !dbg [[DBG235]]
+// DEBUG1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG235]], !prof [[PROF236:![0-9]+]]
+// DEBUG1:       init.check:
+// DEBUG1-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG235]]
+// DEBUG1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG235]]
+// DEBUG1-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG235]]
+// DEBUG1:       init:
+// DEBUG1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7:[0-9]+]]), !dbg [[DBG235]]
+// DEBUG1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB7]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* (i8*)* @.__kmpc_global_ctor_..6, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..7), !dbg [[DBG235]]
+// DEBUG1-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB9]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG237:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*, !dbg [[DBG237]]
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0, !dbg [[DBG238:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG238]]
+// DEBUG1-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP6]])
+// DEBUG1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG239:![0-9]+]]
+// DEBUG1:       invoke.cont:
+// DEBUG1-NEXT:    [[TMP7:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG235]]
+// DEBUG1-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG235]]
+// DEBUG1-NEXT:    br label [[INIT_END]], !dbg [[DBG235]]
+// DEBUG1:       init.end:
+// DEBUG1-NEXT:    [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.), !dbg [[DBG240:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.S3*, !dbg [[DBG240]]
+// DEBUG1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP9]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG241]]
+// DEBUG1-NEXT:    store i32 [[TMP10]], i32* [[RES]], align 4, !dbg [[DBG242:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP11:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB13:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i64 24, i8*** @_ZZ4mainE2sm.cache.), !dbg [[DBG243:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.Smain*, !dbg [[DBG243]]
+// DEBUG1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP12]], i32 0, i32 0, !dbg [[DBG244:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[A2]], align 8, !dbg [[DBG244]]
+// DEBUG1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG245:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG245]]
+// DEBUG1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG245]]
+// DEBUG1-NEXT:    [[TMP15:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG246:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.S1*, !dbg [[DBG246]]
+// DEBUG1-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP16]], i32 0, i32 0, !dbg [[DBG247:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG247]]
+// DEBUG1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG248:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG248]]
+// DEBUG1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG248]]
+// DEBUG1-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG249:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG250:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG250]]
+// DEBUG1-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG250]]
+// DEBUG1-NEXT:    [[TMP21:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB17:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.), !dbg [[DBG251:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.S5*, !dbg [[DBG251]]
+// DEBUG1-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP22]], i32 0, i32 0, !dbg [[DBG252:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP23:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG252]]
+// DEBUG1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG253:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP23]], !dbg [[DBG253]]
+// DEBUG1-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG253]]
+// DEBUG1-NEXT:    [[TMP25:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB19:[0-9]+]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.), !dbg [[DBG254:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP26:%.*]] = bitcast i8* [[TMP25]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG254]]
+// DEBUG1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP26]], i64 0, i64 1, !dbg [[DBG254]]
+// DEBUG1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG254]]
+// DEBUG1-NEXT:    [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0, !dbg [[DBG255:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[A9]], align 4, !dbg [[DBG255]]
+// DEBUG1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG256:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP27]], !dbg [[DBG256]]
+// DEBUG1-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG256]]
+// DEBUG1-NEXT:    [[TMP29:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.), !dbg [[DBG257:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i32*, !dbg [[DBG257]]
+// DEBUG1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !dbg [[DBG257]]
+// DEBUG1-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG258:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP31]], !dbg [[DBG258]]
+// DEBUG1-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG258]]
+// DEBUG1-NEXT:    [[TMP33:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.), !dbg [[DBG259:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to float*, !dbg [[DBG259]]
+// DEBUG1-NEXT:    [[TMP35:%.*]] = load float, float* [[TMP34]], align 4, !dbg [[DBG259]]
+// DEBUG1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP35]] to i32, !dbg [[DBG259]]
+// DEBUG1-NEXT:    [[TMP36:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG260:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[CONV]], !dbg [[DBG260]]
+// DEBUG1-NEXT:    store i32 [[ADD12]], i32* [[RES]], align 4, !dbg [[DBG260]]
+// DEBUG1-NEXT:    [[TMP37:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB25:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.), !dbg [[DBG261:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.S4*, !dbg [[DBG261]]
+// DEBUG1-NEXT:    [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP38]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP39:%.*]] = load i32, i32* [[A13]], align 4, !dbg [[DBG262]]
+// DEBUG1-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG263:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP40]], [[TMP39]], !dbg [[DBG263]]
+// DEBUG1-NEXT:    store i32 [[ADD14]], i32* [[RES]], align 4, !dbg [[DBG263]]
+// DEBUG1-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG264:![0-9]+]]
+// DEBUG1-NEXT:    ret i32 [[TMP41]], !dbg [[DBG265:![0-9]+]]
+// DEBUG1:       lpad:
+// DEBUG1-NEXT:    [[TMP42:%.*]] = landingpad { i8*, i32 }
+// DEBUG1-NEXT:    cleanup, !dbg [[DBG266:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP43:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 0, !dbg [[DBG266]]
+// DEBUG1-NEXT:    store i8* [[TMP43]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG266]]
+// DEBUG1-NEXT:    [[TMP44:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 1, !dbg [[DBG266]]
+// DEBUG1-NEXT:    store i32 [[TMP44]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG266]]
+// DEBUG1-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG235]]
+// DEBUG1-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG235]]
+// DEBUG1:       eh.resume:
+// DEBUG1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG235]]
+// DEBUG1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG235]]
+// DEBUG1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG235]]
+// DEBUG1-NEXT:    [[LPAD_VAL15:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG235]]
+// DEBUG1-NEXT:    resume { i8*, i32 } [[LPAD_VAL15]], !dbg [[DBG235]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..6
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG267:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG270:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.Smain*, !dbg [[DBG270]]
+// DEBUG1-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB5]], i32 [[TMP1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG271:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*, !dbg [[DBG271]]
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG272]]
+// DEBUG1-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) [[TMP3]], i32 [[TMP6]]), !dbg [[DBG273:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG270]]
+// DEBUG1-NEXT:    ret i8* [[TMP7]], !dbg [[DBG270]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// DEBUG1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG274:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG279:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG280:![0-9]+]]
+// DEBUG1-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG280]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG281:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..7
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG282:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG284:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG284]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.Smain*, !dbg [[DBG284]]
+// DEBUG1-NEXT:    call void @_ZZ4mainEN5SmainD1Ev(%struct.Smain* nonnull dereferenceable(24) [[TMP2]]) #[[ATTR4]], !dbg [[DBG284]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG285:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// DEBUG1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG286:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META287:![0-9]+]], metadata !DIExpression()), !dbg [[DBG288:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR4]], !dbg [[DBG289:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG290:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// DEBUG1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG291:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META292:![0-9]+]], metadata !DIExpression()), !dbg [[DBG293:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META294:![0-9]+]], metadata !DIExpression()), !dbg [[DBG295:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG296:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG297:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG296]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG298:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// DEBUG1-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG299:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG1-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG302:![0-9]+]]
+// DEBUG1-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG304:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG305:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_Z6foobarv
+// DEBUG1-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG306:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META307:![0-9]+]], metadata !DIExpression()), !dbg [[DBG308:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB27]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.), !dbg [[DBG309:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S3*, !dbg [[DBG309]]
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP2]], i32 0, i32 0, !dbg [[DBG310:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG310]]
+// DEBUG1-NEXT:    store i32 [[TMP3]], i32* [[RES]], align 4, !dbg [[DBG311:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB29:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG312:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*, !dbg [[DBG312]]
+// DEBUG1-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0, !dbg [[DBG313:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG313]]
+// DEBUG1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG314:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG314]]
+// DEBUG1-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG314]]
+// DEBUG1-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG315:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG316:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG316]]
+// DEBUG1-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG316]]
+// DEBUG1-NEXT:    [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB31:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.), !dbg [[DBG317:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.S5*, !dbg [[DBG317]]
+// DEBUG1-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP11]], i32 0, i32 0, !dbg [[DBG318:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG318]]
+// DEBUG1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG319:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]], !dbg [[DBG319]]
+// DEBUG1-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG319]]
+// DEBUG1-NEXT:    [[TMP14:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB33:[0-9]+]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.), !dbg [[DBG320:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG320]]
+// DEBUG1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP15]], i64 0, i64 1, !dbg [[DBG320]]
+// DEBUG1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG320]]
+// DEBUG1-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG321:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP16:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG321]]
+// DEBUG1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG322:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP16]], !dbg [[DBG322]]
+// DEBUG1-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG322]]
+// DEBUG1-NEXT:    [[TMP18:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB35:[0-9]+]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.), !dbg [[DBG323:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP19:%.*]] = bitcast i8* [[TMP18]] to i32*, !dbg [[DBG323]]
+// DEBUG1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG323]]
+// DEBUG1-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG324:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG324]]
+// DEBUG1-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG324]]
+// DEBUG1-NEXT:    [[TMP22:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB37:[0-9]+]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.), !dbg [[DBG325:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP23:%.*]] = bitcast i8* [[TMP22]] to float*, !dbg [[DBG325]]
+// DEBUG1-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP23]], align 4, !dbg [[DBG325]]
+// DEBUG1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP24]] to i32, !dbg [[DBG325]]
+// DEBUG1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG326:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[CONV]], !dbg [[DBG326]]
+// DEBUG1-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG326]]
+// DEBUG1-NEXT:    [[TMP26:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB39:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.), !dbg [[DBG327:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP27:%.*]] = bitcast i8* [[TMP26]] to %struct.S4*, !dbg [[DBG327]]
+// DEBUG1-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP27]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG328]]
+// DEBUG1-NEXT:    [[TMP29:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG329:![0-9]+]]
+// DEBUG1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP28]], !dbg [[DBG329]]
+// DEBUG1-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG329]]
+// DEBUG1-NEXT:    [[TMP30:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG330:![0-9]+]]
+// DEBUG1-NEXT:    ret i32 [[TMP30]], !dbg [[DBG331:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@__cxx_global_var_init.8
+// DEBUG1-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG332:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG333:![0-9]+]]
+// DEBUG1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG333]]
+// DEBUG1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG333]]
+// DEBUG1:       init.check:
+// DEBUG1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB41:[0-9]+]]), !dbg [[DBG333]]
+// DEBUG1-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB41]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* (i8*)* @.__kmpc_global_ctor_..9, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..10), !dbg [[DBG333]]
+// DEBUG1-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG334:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG333]]
+// DEBUG1-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG333]]
+// DEBUG1-NEXT:    br label [[INIT_END]], !dbg [[DBG333]]
+// DEBUG1:       init.end:
+// DEBUG1-NEXT:    ret void, !dbg [[DBG336:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..9
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG337:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META338:![0-9]+]], metadata !DIExpression()), !dbg [[DBG339:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG340:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*, !dbg [[DBG340]]
+// DEBUG1-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) [[TMP2]], i32 23), !dbg [[DBG341:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG340]]
+// DEBUG1-NEXT:    ret i8* [[TMP3]], !dbg [[DBG340]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// DEBUG1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG342:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG345:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META346:![0-9]+]], metadata !DIExpression()), !dbg [[DBG347:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG348:![0-9]+]]
+// DEBUG1-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG348]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG349:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..10
+// DEBUG1-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG350:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META351:![0-9]+]], metadata !DIExpression()), !dbg [[DBG352:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG352]]
+// DEBUG1-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*, !dbg [[DBG352]]
+// DEBUG1-NEXT:    call void @_ZN2S4D1Ev(%struct.S4* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR4]], !dbg [[DBG352]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG353:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// DEBUG1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG354:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META355:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR4]], !dbg [[DBG357:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG358:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// DEBUG1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG359:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META360:![0-9]+]], metadata !DIExpression()), !dbg [[DBG361:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META362:![0-9]+]], metadata !DIExpression()), !dbg [[DBG363:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG364:![0-9]+]]
+// DEBUG1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG365:![0-9]+]]
+// DEBUG1-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG364]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG366:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// DEBUG1-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG367:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG1-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369:![0-9]+]]
+// DEBUG1-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG370:![0-9]+]]
+// DEBUG1-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG372:![0-9]+]]
+// DEBUG1-NEXT:    ret void, !dbg [[DBG373:![0-9]+]]
+//
+//
+// DEBUG1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG374:![0-9]+]] {
+// DEBUG1-NEXT:  entry:
+// DEBUG1-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG375:![0-9]+]]
+// DEBUG1-NEXT:    call void @.__omp_threadprivate_init_.(), !dbg [[DBG375]]
+// DEBUG1-NEXT:    call void @__cxx_global_var_init.4(), !dbg [[DBG375]]
+// DEBUG1-NEXT:    call void @__cxx_global_var_init.5(), !dbg [[DBG375]]
+// DEBUG1-NEXT:    call void @.__omp_threadprivate_init_..3(), !dbg [[DBG375]]
+// DEBUG1-NEXT:    ret void
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@__cxx_global_var_init
+// DEBUG2-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG115:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG118:![0-9]+]]
+// DEBUG2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG118]]
+// DEBUG2-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) @_ZL3gs1, i32 5), !dbg [[DBG119:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR4:[0-9]+]], !dbg [[DBG118]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG121:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_.
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG122:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG127:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG127]]
+// DEBUG2-NEXT:    call void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[TMP2]], i32 5), !dbg [[DBG128:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG127]]
+// DEBUG2-NEXT:    ret i8* [[TMP3]], !dbg [[DBG127]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1C1Ei
+// DEBUG2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG129:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG135:![0-9]+]]
+// DEBUG2-NEXT:    call void @_ZN2S1C2Ei(%struct.S1* nonnull dereferenceable(4) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG135]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG136:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_.
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG137:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG139]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG139]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[TMP2]]) #[[ATTR4]], !dbg [[DBG139]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG140:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1D1Ev
+// DEBUG2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 !dbg [[DBG141:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @_ZN2S1D2Ev(%struct.S1* nonnull dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG144:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG145:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
+// DEBUG2-SAME: () #[[ATTR0]] !dbg [[DBG146:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    call void @_ZN2S2C1Ei(%struct.S2* nonnull dereferenceable(16) @_ZL3gs2, i32 27), !dbg [[DBG147:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG149:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG150:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2C1Ei
+// DEBUG2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG151:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG157:![0-9]+]]
+// DEBUG2-NEXT:    call void @_ZN2S2C2Ei(%struct.S2* nonnull dereferenceable(16) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG157]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG158:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2D1Ev
+// DEBUG2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG159:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META160:![0-9]+]], metadata !DIExpression()), !dbg [[DBG161:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @_ZN2S2D2Ev(%struct.S2* nonnull dereferenceable(16) [[THIS1]]) #[[ATTR4]], !dbg [[DBG162:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG163:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
+// DEBUG2-SAME: () #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG164:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// DEBUG2-NEXT:    [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG165:![0-9]+]]
+// DEBUG2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i8* (i8*)* @.__kmpc_global_ctor_..3, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..4), !dbg [[DBG165]]
+// DEBUG2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG166:![0-9]+]]
+// DEBUG2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG168:![0-9]+]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 1)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG169:![0-9]+]]
+// DEBUG2:       invoke.cont:
+// DEBUG2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG168]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 2)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG170:![0-9]+]]
+// DEBUG2:       invoke.cont2:
+// DEBUG2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG168]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 3)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG171:![0-9]+]]
+// DEBUG2:       invoke.cont3:
+// DEBUG2-NEXT:    store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG166]]
+// DEBUG2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG172:![0-9]+]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 4)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG173:![0-9]+]]
+// DEBUG2:       invoke.cont7:
+// DEBUG2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG172]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 5)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG174:![0-9]+]]
+// DEBUG2:       invoke.cont8:
+// DEBUG2-NEXT:    store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG172]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 6)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG175:![0-9]+]]
+// DEBUG2:       invoke.cont9:
+// DEBUG2-NEXT:    [[TMP1:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG165]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG165]]
+// DEBUG2:       lpad:
+// DEBUG2-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// DEBUG2-NEXT:    cleanup, !dbg [[DBG176:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0, !dbg [[DBG176]]
+// DEBUG2-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG176]]
+// DEBUG2-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1, !dbg [[DBG176]]
+// DEBUG2-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG176]]
+// DEBUG2-NEXT:    [[TMP5:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG168]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP5]], !dbg [[DBG168]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG168]]
+// DEBUG2:       arraydestroy.body:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP5]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG168]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG168]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG168]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG168]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG168]]
+// DEBUG2:       arraydestroy.done4:
+// DEBUG2-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG168]]
+// DEBUG2:       lpad6:
+// DEBUG2-NEXT:    [[TMP6:%.*]] = landingpad { i8*, i32 }
+// DEBUG2-NEXT:    cleanup, !dbg [[DBG176]]
+// DEBUG2-NEXT:    [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP6]], 0, !dbg [[DBG176]]
+// DEBUG2-NEXT:    store i8* [[TMP7]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG176]]
+// DEBUG2-NEXT:    [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP6]], 1, !dbg [[DBG176]]
+// DEBUG2-NEXT:    store i32 [[TMP8]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG176]]
+// DEBUG2-NEXT:    [[TMP9:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG172]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP9]], !dbg [[DBG172]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG172]]
+// DEBUG2:       arraydestroy.body11:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP9]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG172]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG172]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR4]], !dbg [[DBG172]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG172]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG172]]
+// DEBUG2:       arraydestroy.done15:
+// DEBUG2-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG172]]
+// DEBUG2:       ehcleanup:
+// DEBUG2-NEXT:    [[TMP10:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP10]], i64 0, i64 0, !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG166]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG166]]
+// DEBUG2:       arraydestroy.body17:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG166]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG166]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG166]]
+// DEBUG2:       arraydestroy.done21:
+// DEBUG2-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG166]]
+// DEBUG2:       eh.resume:
+// DEBUG2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG166]]
+// DEBUG2-NEXT:    [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG166]]
+// DEBUG2-NEXT:    resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG166]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..3
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG177:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8
+// DEBUG2-NEXT:    [[ARRAYINIT_ENDOFINIT2:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    [[ARRAYINIT_ENDOFINIT9:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG179:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG180:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG180]]
+// DEBUG2-NEXT:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG181:![0-9]+]]
+// DEBUG2-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[ARRAYINIT_BEGIN1:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG182:![0-9]+]]
+// DEBUG2-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN1]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG182]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN1]], i32 1)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG183:![0-9]+]]
+// DEBUG2:       invoke.cont:
+// DEBUG2-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYINIT_BEGIN1]], i64 1, !dbg [[DBG182]]
+// DEBUG2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG182]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG184:![0-9]+]]
+// DEBUG2:       invoke.cont3:
+// DEBUG2-NEXT:    [[ARRAYINIT_ELEMENT4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT]], i64 1, !dbg [[DBG182]]
+// DEBUG2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT4]], %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG182]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT4]], i32 3)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]], !dbg [[DBG185:![0-9]+]]
+// DEBUG2:       invoke.cont5:
+// DEBUG2-NEXT:    [[ARRAYINIT_ELEMENT7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 1, !dbg [[DBG181]]
+// DEBUG2-NEXT:    store [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[ARRAYINIT_BEGIN8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_ELEMENT7]], i64 0, i64 0, !dbg [[DBG186:![0-9]+]]
+// DEBUG2-NEXT:    store %struct.S1* [[ARRAYINIT_BEGIN8]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG186]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_BEGIN8]], i32 4)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT11:%.*]] unwind label [[LPAD10:%.*]], !dbg [[DBG187:![0-9]+]]
+// DEBUG2:       invoke.cont11:
+// DEBUG2-NEXT:    [[ARRAYINIT_ELEMENT12:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_BEGIN8]], i64 1, !dbg [[DBG186]]
+// DEBUG2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT12]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG186]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT12]], i32 5)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT13:%.*]] unwind label [[LPAD10]], !dbg [[DBG188:![0-9]+]]
+// DEBUG2:       invoke.cont13:
+// DEBUG2-NEXT:    [[ARRAYINIT_ELEMENT14:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYINIT_ELEMENT12]], i64 1, !dbg [[DBG186]]
+// DEBUG2-NEXT:    store %struct.S1* [[ARRAYINIT_ELEMENT14]], %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG186]]
+// DEBUG2-NEXT:    invoke void @_ZN2S1C1Ei(%struct.S1* nonnull dereferenceable(4) [[ARRAYINIT_ELEMENT14]], i32 6)
+// DEBUG2-NEXT:    to label [[INVOKE_CONT15:%.*]] unwind label [[LPAD10]], !dbg [[DBG189:![0-9]+]]
+// DEBUG2:       invoke.cont15:
+// DEBUG2-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG180]]
+// DEBUG2-NEXT:    ret i8* [[TMP3]], !dbg [[DBG180]]
+// DEBUG2:       lpad:
+// DEBUG2-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
+// DEBUG2-NEXT:    cleanup, !dbg [[DBG179]]
+// DEBUG2-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG179]]
+// DEBUG2-NEXT:    store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG179]]
+// DEBUG2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG179]]
+// DEBUG2-NEXT:    store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG179]]
+// DEBUG2-NEXT:    [[TMP7:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT2]], align 8, !dbg [[DBG182]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN1]], [[TMP7]], !dbg [[DBG182]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG182]]
+// DEBUG2:       arraydestroy.body:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP7]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG182]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG182]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG182]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAYINIT_BEGIN1]], !dbg [[DBG182]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG182]]
+// DEBUG2:       arraydestroy.done6:
+// DEBUG2-NEXT:    br label [[EHCLEANUP:%.*]], !dbg [[DBG182]]
+// DEBUG2:       lpad10:
+// DEBUG2-NEXT:    [[TMP8:%.*]] = landingpad { i8*, i32 }
+// DEBUG2-NEXT:    cleanup, !dbg [[DBG179]]
+// DEBUG2-NEXT:    [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0, !dbg [[DBG179]]
+// DEBUG2-NEXT:    store i8* [[TMP9]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG179]]
+// DEBUG2-NEXT:    [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 1, !dbg [[DBG179]]
+// DEBUG2-NEXT:    store i32 [[TMP10]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG179]]
+// DEBUG2-NEXT:    [[TMP11:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT9]], align 8, !dbg [[DBG186]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* [[ARRAYINIT_BEGIN8]], [[TMP11]], !dbg [[DBG186]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG186]]
+// DEBUG2:       arraydestroy.body17:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[TMP11]], [[LPAD10]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG186]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG186]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG186]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], [[ARRAYINIT_BEGIN8]], !dbg [[DBG186]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG186]]
+// DEBUG2:       arraydestroy.done21:
+// DEBUG2-NEXT:    br label [[EHCLEANUP]], !dbg [[DBG186]]
+// DEBUG2:       ehcleanup:
+// DEBUG2-NEXT:    [[TMP12:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[PAD_ARRAYBEGIN:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYINIT_BEGIN]], i64 0, i64 0, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP12]], i64 0, i64 0, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ISEMPTY22:%.*]] = icmp eq %struct.S1* [[PAD_ARRAYBEGIN]], [[PAD_ARRAYEND]], !dbg [[DBG181]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_ISEMPTY22]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY23:%.*]], !dbg [[DBG181]]
+// DEBUG2:       arraydestroy.body23:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST24:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT25:%.*]], [[ARRAYDESTROY_BODY23]] ], !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT25]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST24]], i64 -1, !dbg [[DBG181]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT25]]) #[[ATTR4]], !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE26:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT25]], [[PAD_ARRAYBEGIN]], !dbg [[DBG181]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE26]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY23]], !dbg [[DBG181]]
+// DEBUG2:       arraydestroy.done27:
+// DEBUG2-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG181]]
+// DEBUG2:       eh.resume:
+// DEBUG2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG181]]
+// DEBUG2-NEXT:    [[LPAD_VAL28:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG181]]
+// DEBUG2-NEXT:    resume { i8*, i32 } [[LPAD_VAL28]], !dbg [[DBG181]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..4
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG190:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META191:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG192]]
+// DEBUG2-NEXT:    [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S1*, !dbg [[DBG192]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAY_BEGIN]], i64 6, !dbg [[DBG192]]
+// DEBUG2-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG192]]
+// DEBUG2:       arraydestroy.body:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG192]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG192]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG192]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG192]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG192]]
+// DEBUG2:       arraydestroy.done1:
+// DEBUG2-NEXT:    ret void, !dbg [[DBG193:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG194:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG198:![0-9]+]]
+// DEBUG2-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG198]]
+// DEBUG2:       arraydestroy.body:
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([[STRUCT_S1:%.*]], %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG198]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG198]]
+// DEBUG2-NEXT:    call void @_ZN2S1D1Ev(%struct.S1* nonnull dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG198]]
+// DEBUG2-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG198]]
+// DEBUG2-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG198]]
+// DEBUG2:       arraydestroy.done1:
+// DEBUG2-NEXT:    ret void, !dbg [[DBG198]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@main
+// DEBUG2-SAME: () #[[ATTR5:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG53:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    [[EXN_SLOT:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]])
+// DEBUG2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG200:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ4mainE2sm to i8*) acquire, align 8, !dbg [[DBG201:![0-9]+]]
+// DEBUG2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0, !dbg [[DBG201]]
+// DEBUG2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG201]], !prof [[PROF202:![0-9]+]]
+// DEBUG2:       init.check:
+// DEBUG2-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(i64* @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG201]]
+// DEBUG2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG201]]
+// DEBUG2-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG201]]
+// DEBUG2:       init:
+// DEBUG2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7:[0-9]+]]), !dbg [[DBG201]]
+// DEBUG2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB7]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* (i8*)* @.__kmpc_global_ctor_..5, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..6), !dbg [[DBG201]]
+// DEBUG2-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB9]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG203:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*, !dbg [[DBG203]]
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0, !dbg [[DBG204:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG204]]
+// DEBUG2-NEXT:    invoke void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) @_ZZ4mainE2sm, i32 [[TMP6]])
+// DEBUG2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG205:![0-9]+]]
+// DEBUG2:       invoke.cont:
+// DEBUG2-NEXT:    [[TMP7:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.Smain*)* @_ZZ4mainEN5SmainD1Ev to void (i8*)*), i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG201]]
+// DEBUG2-NEXT:    call void @__cxa_guard_release(i64* @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG201]]
+// DEBUG2-NEXT:    br label [[INIT_END]], !dbg [[DBG201]]
+// DEBUG2:       init.end:
+// DEBUG2-NEXT:    [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.), !dbg [[DBG206:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.S3*, !dbg [[DBG206]]
+// DEBUG2-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP9]], i32 0, i32 0, !dbg [[DBG207:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG207]]
+// DEBUG2-NEXT:    store i32 [[TMP10]], i32* [[RES]], align 4, !dbg [[DBG208:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP11:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB13:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.Smain* @_ZZ4mainE2sm to i8*), i64 24, i8*** @_ZZ4mainE2sm.cache.), !dbg [[DBG209:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.Smain*, !dbg [[DBG209]]
+// DEBUG2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP12]], i32 0, i32 0, !dbg [[DBG210:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[A2]], align 8, !dbg [[DBG210]]
+// DEBUG2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG211:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG211]]
+// DEBUG2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG211]]
+// DEBUG2-NEXT:    [[TMP15:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG212:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.S1*, !dbg [[DBG212]]
+// DEBUG2-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP16]], i32 0, i32 0, !dbg [[DBG213:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG213]]
+// DEBUG2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG214:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG214]]
+// DEBUG2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG214]]
+// DEBUG2-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG215:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG216]]
+// DEBUG2-NEXT:    store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG216]]
+// DEBUG2-NEXT:    [[TMP21:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB17:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.), !dbg [[DBG217:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.S5*, !dbg [[DBG217]]
+// DEBUG2-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP22]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG218]]
+// DEBUG2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG219:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP23]], !dbg [[DBG219]]
+// DEBUG2-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG219]]
+// DEBUG2-NEXT:    [[TMP25:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB19:[0-9]+]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.), !dbg [[DBG220:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP26:%.*]] = bitcast i8* [[TMP25]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG220]]
+// DEBUG2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP26]], i64 0, i64 1, !dbg [[DBG220]]
+// DEBUG2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG220]]
+// DEBUG2-NEXT:    [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0, !dbg [[DBG221:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[A9]], align 4, !dbg [[DBG221]]
+// DEBUG2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG222:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP27]], !dbg [[DBG222]]
+// DEBUG2-NEXT:    store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG222]]
+// DEBUG2-NEXT:    [[TMP29:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.), !dbg [[DBG223:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i32*, !dbg [[DBG223]]
+// DEBUG2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !dbg [[DBG223]]
+// DEBUG2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP31]], !dbg [[DBG224]]
+// DEBUG2-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG224]]
+// DEBUG2-NEXT:    [[TMP33:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.), !dbg [[DBG225:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to float*, !dbg [[DBG225]]
+// DEBUG2-NEXT:    [[TMP35:%.*]] = load float, float* [[TMP34]], align 4, !dbg [[DBG225]]
+// DEBUG2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP35]] to i32, !dbg [[DBG225]]
+// DEBUG2-NEXT:    [[TMP36:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[CONV]], !dbg [[DBG226]]
+// DEBUG2-NEXT:    store i32 [[ADD12]], i32* [[RES]], align 4, !dbg [[DBG226]]
+// DEBUG2-NEXT:    [[TMP37:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB25:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.), !dbg [[DBG227:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.S4*, !dbg [[DBG227]]
+// DEBUG2-NEXT:    [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP38]], i32 0, i32 0, !dbg [[DBG228:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP39:%.*]] = load i32, i32* [[A13]], align 4, !dbg [[DBG228]]
+// DEBUG2-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG229:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP40]], [[TMP39]], !dbg [[DBG229]]
+// DEBUG2-NEXT:    store i32 [[ADD14]], i32* [[RES]], align 4, !dbg [[DBG229]]
+// DEBUG2-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG230:![0-9]+]]
+// DEBUG2-NEXT:    ret i32 [[TMP41]], !dbg [[DBG231:![0-9]+]]
+// DEBUG2:       lpad:
+// DEBUG2-NEXT:    [[TMP42:%.*]] = landingpad { i8*, i32 }
+// DEBUG2-NEXT:    cleanup, !dbg [[DBG232:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP43:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 0, !dbg [[DBG232]]
+// DEBUG2-NEXT:    store i8* [[TMP43]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG232]]
+// DEBUG2-NEXT:    [[TMP44:%.*]] = extractvalue { i8*, i32 } [[TMP42]], 1, !dbg [[DBG232]]
+// DEBUG2-NEXT:    store i32 [[TMP44]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG232]]
+// DEBUG2-NEXT:    call void @__cxa_guard_abort(i64* @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG201]]
+// DEBUG2-NEXT:    br label [[EH_RESUME:%.*]], !dbg [[DBG201]]
+// DEBUG2:       eh.resume:
+// DEBUG2-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG201]]
+// DEBUG2-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG201]]
+// DEBUG2-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG201]]
+// DEBUG2-NEXT:    [[LPAD_VAL15:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG201]]
+// DEBUG2-NEXT:    resume { i8*, i32 } [[LPAD_VAL15]], !dbg [[DBG201]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..5
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG233:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG236:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.Smain*, !dbg [[DBG236]]
+// DEBUG2-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB5]], i32 [[TMP1]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG237:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*, !dbg [[DBG237]]
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0, !dbg [[DBG238:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG238]]
+// DEBUG2-NEXT:    call void @_ZZ4mainEN5SmainC1Ei(%struct.Smain* nonnull dereferenceable(24) [[TMP3]], i32 [[TMP6]]), !dbg [[DBG239:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG236]]
+// DEBUG2-NEXT:    ret i8* [[TMP7]], !dbg [[DBG236]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei
+// DEBUG2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG240:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META241:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG246:![0-9]+]]
+// DEBUG2-NEXT:    call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* nonnull dereferenceable(24) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG246]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG247:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..6
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG248:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG250]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.Smain*, !dbg [[DBG250]]
+// DEBUG2-NEXT:    call void @_ZZ4mainEN5SmainD1Ev(%struct.Smain* nonnull dereferenceable(24) [[TMP2]]) #[[ATTR4]], !dbg [[DBG250]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG251:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev
+// DEBUG2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG252:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* nonnull dereferenceable(24) [[THIS1]]) #[[ATTR4]], !dbg [[DBG255:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG256:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_Z6foobarv
+// DEBUG2-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG257:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[RES:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG259:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB27]], i32 [[TMP0]], i8* bitcast (%struct.S3* @_ZN6Static1sE to i8*), i64 8, i8*** @_ZN6Static1sE.cache.), !dbg [[DBG260:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S3*, !dbg [[DBG260]]
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP2]], i32 0, i32 0, !dbg [[DBG261:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG261]]
+// DEBUG2-NEXT:    store i32 [[TMP3]], i32* [[RES]], align 4, !dbg [[DBG262:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB29:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i64 4, i8*** @_ZL3gs1.cache.), !dbg [[DBG263:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S1*, !dbg [[DBG263]]
+// DEBUG2-NEXT:    [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP5]], i32 0, i32 0, !dbg [[DBG264:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG264]]
+// DEBUG2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG265:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG265]]
+// DEBUG2-NEXT:    store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG265]]
+// DEBUG2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG266:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG267:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG267]]
+// DEBUG2-NEXT:    store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG267]]
+// DEBUG2-NEXT:    [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB31:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S5* @gs3 to i8*), i64 12, i8*** @gs3.cache.), !dbg [[DBG268:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.S5*, !dbg [[DBG268]]
+// DEBUG2-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP11]], i32 0, i32 0, !dbg [[DBG269:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG269]]
+// DEBUG2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG270:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]], !dbg [[DBG270]]
+// DEBUG2-NEXT:    store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG270]]
+// DEBUG2-NEXT:    [[TMP14:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB33:[0-9]+]], i32 [[TMP0]], i8* bitcast ([2 x [3 x %struct.S1]]* @arr_x to i8*), i64 24, i8*** @arr_x.cache.), !dbg [[DBG271:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to [2 x [3 x %struct.S1]]*, !dbg [[DBG271]]
+// DEBUG2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP15]], i64 0, i64 1, !dbg [[DBG271]]
+// DEBUG2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG271]]
+// DEBUG2-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG272]]
+// DEBUG2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG273:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP16]], !dbg [[DBG273]]
+// DEBUG2-NEXT:    store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG273]]
+// DEBUG2-NEXT:    [[TMP18:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB35:[0-9]+]], i32 [[TMP0]], i8* bitcast (i32* @_ZN2STIiE2stE to i8*), i64 4, i8*** @_ZN2STIiE2stE.cache.), !dbg [[DBG274:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP19:%.*]] = bitcast i8* [[TMP18]] to i32*, !dbg [[DBG274]]
+// DEBUG2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG274]]
+// DEBUG2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG275:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG275]]
+// DEBUG2-NEXT:    store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG275]]
+// DEBUG2-NEXT:    [[TMP22:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB37:[0-9]+]], i32 [[TMP0]], i8* bitcast (float* @_ZN2STIfE2stE to i8*), i64 4, i8*** @_ZN2STIfE2stE.cache.), !dbg [[DBG276:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP23:%.*]] = bitcast i8* [[TMP22]] to float*, !dbg [[DBG276]]
+// DEBUG2-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP23]], align 4, !dbg [[DBG276]]
+// DEBUG2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP24]] to i32, !dbg [[DBG276]]
+// DEBUG2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG277:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[CONV]], !dbg [[DBG277]]
+// DEBUG2-NEXT:    store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG277]]
+// DEBUG2-NEXT:    [[TMP26:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB39:[0-9]+]], i32 [[TMP0]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i64 8, i8*** @_ZN2STI2S4E2stE.cache.), !dbg [[DBG278:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP27:%.*]] = bitcast i8* [[TMP26]] to %struct.S4*, !dbg [[DBG278]]
+// DEBUG2-NEXT:    [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP27]], i32 0, i32 0, !dbg [[DBG279:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG279]]
+// DEBUG2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG280:![0-9]+]]
+// DEBUG2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP28]], !dbg [[DBG280]]
+// DEBUG2-NEXT:    store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG280]]
+// DEBUG2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG281:![0-9]+]]
+// DEBUG2-NEXT:    ret i32 [[TMP30]], !dbg [[DBG282:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@__cxx_global_var_init.7
+// DEBUG2-SAME: () #[[ATTR0]] comdat($_ZN2STI2S4E2stE) !dbg [[DBG283:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG284:![0-9]+]]
+// DEBUG2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG284]]
+// DEBUG2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG284]]
+// DEBUG2:       init.check:
+// DEBUG2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB41:[0-9]+]]), !dbg [[DBG284]]
+// DEBUG2-NEXT:    call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB41]], i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* (i8*)* @.__kmpc_global_ctor_..8, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..9), !dbg [[DBG284]]
+// DEBUG2-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) @_ZN2STI2S4E2stE, i32 23), !dbg [[DBG285:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR4]], !dbg [[DBG284]]
+// DEBUG2-NEXT:    store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG284]]
+// DEBUG2-NEXT:    br label [[INIT_END]], !dbg [[DBG284]]
+// DEBUG2:       init.end:
+// DEBUG2-NEXT:    ret void, !dbg [[DBG287:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..8
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG288:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META289:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG291:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*, !dbg [[DBG291]]
+// DEBUG2-NEXT:    call void @_ZN2S4C1Ei(%struct.S4* nonnull dereferenceable(8) [[TMP2]], i32 23), !dbg [[DBG292:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG291]]
+// DEBUG2-NEXT:    ret i8* [[TMP3]], !dbg [[DBG291]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4C1Ei
+// DEBUG2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG293:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META294:![0-9]+]], metadata !DIExpression()), !dbg [[DBG296:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG299:![0-9]+]]
+// DEBUG2-NEXT:    call void @_ZN2S4C2Ei(%struct.S4* nonnull dereferenceable(8) [[THIS1]], i32 [[TMP0]]), !dbg [[DBG299]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG300:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..9
+// DEBUG2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] !dbg [[DBG301:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// DEBUG2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG303]]
+// DEBUG2-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S4*, !dbg [[DBG303]]
+// DEBUG2-NEXT:    call void @_ZN2S4D1Ev(%struct.S4* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR4]], !dbg [[DBG303]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG304:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4D1Ev
+// DEBUG2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG305:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG307:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @_ZN2S4D2Ev(%struct.S4* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR4]], !dbg [[DBG308:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG309:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1C2Ei
+// DEBUG2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG310:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG314:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG315:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG316:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG315]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG317:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1D2Ev
+// DEBUG2-SAME: (%struct.S1* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG318:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
+// DEBUG2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META319:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG321:![0-9]+]]
+// DEBUG2-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG323:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG324:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2C2Ei
+// DEBUG2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG325:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META326:![0-9]+]], metadata !DIExpression()), !dbg [[DBG327:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META328:![0-9]+]], metadata !DIExpression()), !dbg [[DBG329:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG330:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG331:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG330]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG332:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2D2Ev
+// DEBUG2-SAME: (%struct.S2* nonnull dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG333:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8
+// DEBUG2-NEXT:    store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META334:![0-9]+]], metadata !DIExpression()), !dbg [[DBG335:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG336:![0-9]+]]
+// DEBUG2-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG338:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG339:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei
+// DEBUG2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG340:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META341:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG344:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG345:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG346:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG345]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG347:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev
+// DEBUG2-SAME: (%struct.Smain* nonnull dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG348:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8
+// DEBUG2-NEXT:    store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG350:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG351:![0-9]+]]
+// DEBUG2-NEXT:    store i32 0, i32* [[A]], align 8, !dbg [[DBG353:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG354:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4C2Ei
+// DEBUG2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG355:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// DEBUG2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META356:![0-9]+]], metadata !DIExpression()), !dbg [[DBG357:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META358:![0-9]+]], metadata !DIExpression()), !dbg [[DBG359:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]]
+// DEBUG2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG361:![0-9]+]]
+// DEBUG2-NEXT:    store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG360]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG362:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4D2Ev
+// DEBUG2-SAME: (%struct.S4* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG363:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8
+// DEBUG2-NEXT:    store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META364:![0-9]+]], metadata !DIExpression()), !dbg [[DBG365:![0-9]+]]
+// DEBUG2-NEXT:    [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8
+// DEBUG2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG366:![0-9]+]]
+// DEBUG2-NEXT:    store i32 0, i32* [[A]], align 4, !dbg [[DBG368:![0-9]+]]
+// DEBUG2-NEXT:    ret void, !dbg [[DBG369:![0-9]+]]
+//
+//
+// DEBUG2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp
+// DEBUG2-SAME: () #[[ATTR0]] !dbg [[DBG370:![0-9]+]] {
+// DEBUG2-NEXT:  entry:
+// DEBUG2-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG371:![0-9]+]]
+// DEBUG2-NEXT:    call void @__cxx_global_var_init.1(), !dbg [[DBG371]]
+// DEBUG2-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG371]]
+// DEBUG2-NEXT:    ret void
+//
diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp
index 5ee48c3..2976088 100644
--- a/clang/test/OpenMP/tile_codegen.cpp
+++ b/clang/test/OpenMP/tile_codegen.cpp
@@ -1078,21 +1078,15 @@
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@body
-// CHECK2-SAME: (...) #[[ATTR0:[0-9]+]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    ret void
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init
-// CHECK2-SAME: () #[[ATTR1:[0-9]+]] section ".text.startup" {
+// CHECK2-SAME: () #[[ATTR0:[0-9]+]] section ".text.startup" {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @_ZN1SC1Ev(%struct.S* nonnull dereferenceable(4) @s)
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
@@ -1102,7 +1096,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_ZN1SC2Ev
-// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK2-SAME: (%struct.S* nonnull dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32*, align 8
@@ -1164,8 +1158,14 @@
 // CHECK2-NEXT:    ret void
 //
 //
+// CHECK2-LABEL: define {{[^@]+}}@body
+// CHECK2-SAME: (...) #[[ATTR2:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    ret void
+//
+//
 // CHECK2-LABEL: define {{[^@]+}}@foo1
-// CHECK2-SAME: (i32 [[START:%.*]], i32 [[END:%.*]], i32 [[STEP:%.*]]) #[[ATTR0]] {
+// CHECK2-SAME: (i32 [[START:%.*]], i32 [[END:%.*]], i32 [[STEP:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
@@ -1255,7 +1255,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo2
-// CHECK2-SAME: (i32 [[START:%.*]], i32 [[END:%.*]], i32 [[STEP:%.*]]) #[[ATTR0]] {
+// CHECK2-SAME: (i32 [[START:%.*]], i32 [[END:%.*]], i32 [[STEP:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
@@ -1368,7 +1368,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo3
-// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-SAME: () #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
@@ -1510,7 +1510,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo4
-// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-SAME: () #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
@@ -1663,7 +1663,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo5
-// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-SAME: () #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
@@ -1872,7 +1872,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo6
-// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-SAME: () #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
 // CHECK2-NEXT:    ret void
@@ -1975,14 +1975,14 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@tfoo7
-// CHECK2-SAME: () #[[ATTR0]] {
+// CHECK2-SAME: () #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @_Z4foo7IiLi3ELi5EEvT_S0_(i32 0, i32 42)
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_Z4foo7IiLi3ELi5EEvT_S0_
-// CHECK2-SAME: (i32 [[START:%.*]], i32 [[END:%.*]]) #[[ATTR0]] comdat {
+// CHECK2-SAME: (i32 [[START:%.*]], i32 [[END:%.*]]) #[[ATTR2]] comdat {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
@@ -2066,7 +2066,7 @@
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_tile_codegen.cpp
-// CHECK2-SAME: () #[[ATTR1]] section ".text.startup" {
+// CHECK2-SAME: () #[[ATTR0]] section ".text.startup" {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @__cxx_global_var_init()
 // CHECK2-NEXT:    ret void
diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected
index 96868dd..9ca1232 100644
--- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected
+++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected
@@ -30,12 +30,15 @@
     A[i] = 1.0;
   }
 }
-// OMP-LABEL: @foo(
+// OMP-LABEL: @main(
 // OMP-NEXT:  entry:
+// OMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // OMP-NEXT:    store i32 0, i32* [[I]], align 4
 // OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-// OMP-NEXT:    ret void
+// OMP-NEXT:    call void @foo()
+// OMP-NEXT:    ret i32 0
 //
 //
 // OMP-LABEL: @.omp_outlined.(
@@ -85,7 +88,7 @@
 // OMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I]], align 4
 // OMP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64
 // OMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]]
-// OMP-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX]], align 8
+// OMP-NEXT:    store double 0.000000e+00, double* [[ARRAYIDX]], align 8
 // OMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // OMP:       omp.body.continue:
 // OMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
@@ -101,15 +104,12 @@
 // OMP-NEXT:    ret void
 //
 //
-// OMP-LABEL: @main(
+// OMP-LABEL: @foo(
 // OMP-NEXT:  entry:
-// OMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
-// OMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // OMP-NEXT:    store i32 0, i32* [[I]], align 4
 // OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
-// OMP-NEXT:    call void @foo()
-// OMP-NEXT:    ret i32 0
+// OMP-NEXT:    ret void
 //
 //
 // OMP-LABEL: @.omp_outlined..1(
@@ -159,7 +159,7 @@
 // OMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I]], align 4
 // OMP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64
 // OMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]]
-// OMP-NEXT:    store double 0.000000e+00, double* [[ARRAYIDX]], align 8
+// OMP-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX]], align 8
 // OMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // OMP:       omp.body.continue:
 // OMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected
index 6685e56c7..da066b4 100644
--- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected
+++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected
@@ -15,7 +15,7 @@
 // OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
 // OMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // OMP-NEXT:    store i32 0, i32* [[I]], align 4
-// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
 // OMP-NEXT:    call void @foo()
 // OMP-NEXT:    ret i32 0
 //
@@ -63,7 +63,7 @@
 // OMP-NEXT:  entry:
 // OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
 // OMP-NEXT:    store i32 0, i32* [[I]], align 4
-// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 // OMP-NEXT:    ret void
 //
 // NOOMP-LABEL: @foo(
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 3ae87d5..4db8bcb 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -922,6 +922,13 @@
     VersionedClause<OMPC_DistSchedule>
   ];
 }
+def OMP_BeginDeclareTarget : Directive<"begin declare target"> {
+  let allowedClauses = [
+    VersionedClause<OMPC_To>,
+    VersionedClause<OMPC_Link>,
+    VersionedClause<OMPC_DeviceType>,
+  ];
+}
 def OMP_DeclareTarget : Directive<"declare target"> {
   let allowedClauses = [
     VersionedClause<OMPC_To>,
