Re-apply 0071eaaf089, "[ORC] Export __cxa_atexit ...", with fixes.

Forgot to include part of the testcase. Thank to Nico for spotting that and
reverting!
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 3be1381..9a868a6 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -29,14 +29,6 @@
 
 namespace {
 
-/// Add a reference to the __dso_handle global to the given module.
-/// Returns a reference to the __dso_handle IR decl.
-GlobalVariable *addDSOHandleDecl(Module &M) {
-  auto DSOHandleTy = StructType::create(M.getContext(), "lljit.dso_handle");
-  return new GlobalVariable(M, DSOHandleTy, true, GlobalValue::ExternalLinkage,
-                            nullptr, "__dso_handle");
-}
-
 /// Adds helper function decls and wrapper functions that call the helper with
 /// some additional prefix arguments.
 ///
@@ -143,11 +135,10 @@
     SymbolMap StdInterposes;
 
     StdInterposes[Mangle("__lljit.platform_support_instance")] =
-        JITEvaluatedSymbol(pointerToJITTargetAddress(this), JITSymbolFlags());
+        JITEvaluatedSymbol(pointerToJITTargetAddress(this),
+                           JITSymbolFlags::Exported);
     StdInterposes[Mangle("__lljit.cxa_atexit_helper")] = JITEvaluatedSymbol(
         pointerToJITTargetAddress(registerAtExitHelper), JITSymbolFlags());
-    StdInterposes[Mangle("__lljit.run_atexits_helper")] = JITEvaluatedSymbol(
-        pointerToJITTargetAddress(runAtExitsHelper), JITSymbolFlags());
 
     cantFail(
         J.getMainJITDylib().define(absoluteSymbols(std::move(StdInterposes))));
@@ -159,6 +150,14 @@
 
   /// Adds a module that defines the __dso_handle global.
   Error setupJITDylib(JITDylib &JD) {
+
+    // Add per-jitdylib standard interposes.
+    MangleAndInterner Mangle(getExecutionSession(), J.getDataLayout());
+    SymbolMap PerJDInterposes;
+    PerJDInterposes[Mangle("__lljit.run_atexits_helper")] = JITEvaluatedSymbol(
+        pointerToJITTargetAddress(runAtExitsHelper), JITSymbolFlags());
+    cantFail(JD.define(absoluteSymbols(std::move(PerJDInterposes))));
+
     auto Ctx = std::make_unique<LLVMContext>();
     auto M = std::make_unique<Module>("__standard_lib", *Ctx);
     M->setDataLayout(J.getDataLayout());
@@ -168,9 +167,23 @@
         *M, Int64Ty, true, GlobalValue::ExternalLinkage,
         ConstantInt::get(Int64Ty, reinterpret_cast<uintptr_t>(&JD)),
         "__dso_handle");
-    DSOHandle->setVisibility(GlobalValue::HiddenVisibility);
+    DSOHandle->setVisibility(GlobalValue::DefaultVisibility);
     DSOHandle->setInitializer(
         ConstantInt::get(Int64Ty, pointerToJITTargetAddress(&JD)));
+
+    auto *GenericIRPlatformSupportTy =
+        StructType::create(*Ctx, "lljit.GenericLLJITIRPlatformSupport");
+
+    auto *PlatformInstanceDecl = new GlobalVariable(
+        *M, GenericIRPlatformSupportTy, true, GlobalValue::ExternalLinkage,
+        nullptr, "__lljit.platform_support_instance");
+
+    auto *VoidTy = Type::getVoidTy(*Ctx);
+    addHelperAndWrapper(
+        *M, "__lljit_run_atexits", FunctionType::get(VoidTy, {}, false),
+        GlobalValue::HiddenVisibility, "__lljit.run_atexits_helper",
+        {PlatformInstanceDecl, DSOHandle});
+
     return J.addIRModule(JD, ThreadSafeModule(std::move(M), std::move(Ctx)));
   }
 
@@ -316,6 +329,16 @@
       }
     });
 
+    LLVM_DEBUG({
+      dbgs() << "JITDylib deinit order is [ ";
+      for (auto *JD : DFSLinkOrder)
+        dbgs() << "\"" << JD->getName() << "\" ";
+      dbgs() << "]\n";
+      dbgs() << "Looking up deinit functions:\n";
+      for (auto &KV : LookupSymbols)
+        dbgs() << "  \"" << KV.first->getName() << "\": " << KV.second << "\n";
+    });
+
     auto LookupResult = Platform::lookupInitSymbols(ES, LookupSymbols);
 
     if (!LookupResult)
@@ -387,11 +410,19 @@
 
   static void registerAtExitHelper(void *Self, void (*F)(void *), void *Ctx,
                                    void *DSOHandle) {
+    LLVM_DEBUG({
+      dbgs() << "Registering atexit function " << (void *)F << " for JD "
+             << (*static_cast<JITDylib **>(DSOHandle))->getName() << "\n";
+    });
     static_cast<GenericLLVMIRPlatformSupport *>(Self)->AtExitMgr.registerAtExit(
         F, Ctx, DSOHandle);
   }
 
   static void runAtExitsHelper(void *Self, void *DSOHandle) {
+    LLVM_DEBUG({
+      dbgs() << "Running atexit functions for JD "
+             << (*static_cast<JITDylib **>(DSOHandle))->getName() << "\n";
+    });
     static_cast<GenericLLVMIRPlatformSupport *>(Self)->AtExitMgr.runAtExits(
         DSOHandle);
   }
@@ -410,8 +441,6 @@
         *M, GenericIRPlatformSupportTy, true, GlobalValue::ExternalLinkage,
         nullptr, "__lljit.platform_support_instance");
 
-    auto *DSOHandleDecl = addDSOHandleDecl(*M);
-
     auto *Int8Ty = Type::getInt8Ty(*Ctx);
     auto *IntTy = Type::getIntNTy(*Ctx, sizeof(int) * CHAR_BIT);
     auto *VoidTy = Type::getVoidTy(*Ctx);
@@ -423,14 +452,9 @@
         *M, "__cxa_atexit",
         FunctionType::get(IntTy, {AtExitCallbackPtrTy, BytePtrTy, BytePtrTy},
                           false),
-        GlobalValue::HiddenVisibility, "__lljit.cxa_atexit_helper",
+        GlobalValue::DefaultVisibility, "__lljit.cxa_atexit_helper",
         {PlatformInstanceDecl});
 
-    addHelperAndWrapper(
-        *M, "__lljit_run_atexits", FunctionType::get(VoidTy, {}, false),
-        GlobalValue::HiddenVisibility, "__lljit.run_atexits_helper",
-        {PlatformInstanceDecl, DSOHandleDecl});
-
     return ThreadSafeModule(std::move(M), std::move(Ctx));
   }
 
@@ -676,7 +700,7 @@
     auto *DSOHandle =
         new GlobalVariable(M, Int64Ty, true, GlobalValue::ExternalLinkage,
                            ConstantInt::get(Int64Ty, 0), "__dso_handle");
-    DSOHandle->setVisibility(GlobalValue::HiddenVisibility);
+    DSOHandle->setVisibility(GlobalValue::DefaultVisibility);
 
     return cantFail(J.getIRCompileLayer().getCompiler()(M));
   }
diff --git a/llvm/test/ExecutionEngine/OrcLazy/Inputs/noop-main.ll b/llvm/test/ExecutionEngine/OrcLazy/Inputs/noop-main.ll
new file mode 100644
index 0000000..8438173
--- /dev/null
+++ b/llvm/test/ExecutionEngine/OrcLazy/Inputs/noop-main.ll
@@ -0,0 +1,7 @@
+; Can be used as an entry point for tests that rely purely on static
+; initializer side-effects.
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+  ret i32 0
+}
diff --git a/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll b/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll
index 00b54fb..67d392e 100644
--- a/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll
+++ b/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll
@@ -1,6 +1,12 @@
-; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout %s | FileCheck %s
+; Test that global constructors and destructors are run:
 ;
-; Test that global constructors and destructors are run.
+; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout -extra-module %s \
+; RUN:   %S/Inputs/noop-main.ll | FileCheck %s
+;
+; Test that this is true for global constructors and destructors in other
+; JITDylibs.
+; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout \
+; RUN:   -jd extra -extra-module %s -jd main %S/Inputs/noop-main.ll | FileCheck %s
 ;
 ; CHECK: Hello
 ; CHECK: [ {{.*}}main{{.*}} ]
@@ -22,11 +28,6 @@
 
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 
-define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
-entry:
-  ret i32 0
-}
-
 define internal void @_GLOBAL__sub_I_hello.cpp() {
 entry:
   %puts.i.i.i = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str, i64 0, i64 0))