| //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // This pass builds the coroutine frame and outlines resume and destroy parts |
| // of the coroutine into separate functions. |
| // |
| // We present a coroutine to an LLVM as an ordinary function with suspension |
| // points marked up with intrinsics. We let the optimizer party on the coroutine |
| // as a single function for as long as possible. Shortly before the coroutine is |
| // eligible to be inlined into its callers, we split up the coroutine into parts |
| // corresponding to an initial, resume and destroy invocations of the coroutine, |
| // add them to the current SCC and restart the IPO pipeline to optimize the |
| // coroutine subfunctions we extracted before proceeding to the caller of the |
| // coroutine. |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/Coroutines/CoroSplit.h" |
| #include "CoroCloner.h" |
| #include "CoroInternal.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/PriorityWorklist.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/CFG.h" |
| #include "llvm/Analysis/CallGraph.h" |
| #include "llvm/Analysis/ConstantFolding.h" |
| #include "llvm/Analysis/LazyCallGraph.h" |
| #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| #include "llvm/Analysis/TargetTransformInfo.h" |
| #include "llvm/BinaryFormat/Dwarf.h" |
| #include "llvm/IR/Argument.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/CFG.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugInfo.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Dominators.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/GlobalVariable.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/IR/Verifier.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/PrettyStackTrace.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/Coroutines/MaterializationUtils.h" |
| #include "llvm/Transforms/Scalar.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| #include "llvm/Transforms/Utils/CallGraphUpdater.h" |
| #include "llvm/Transforms/Utils/Cloning.h" |
| #include "llvm/Transforms/Utils/Local.h" |
| #include <cassert> |
| #include <cstddef> |
| #include <cstdint> |
| #include <initializer_list> |
| #include <iterator> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "coro-split" |
| |
| namespace { |
| /// Collect (a known) subset of global debug info metadata potentially used by |
| /// the function \p F. |
| /// |
| /// This metadata set can be used to avoid cloning debug info not owned by \p F |
| /// and is shared among all potential clones \p F. |
| MetadataSetTy collectCommonDebugInfo(Function &F) { |
| TimeTraceScope FunctionScope("CollectCommonDebugInfo"); |
| |
| DebugInfoFinder DIFinder; |
| DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning( |
| F, CloneFunctionChangeType::LocalChangesOnly, DIFinder); |
| |
| return FindDebugInfoToIdentityMap(CloneFunctionChangeType::LocalChangesOnly, |
| DIFinder, SPClonedWithinModule); |
| } |
| } // end anonymous namespace |
| |
| // FIXME: |
| // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape |
| // and it is known that other transformations, for example, sanitizers |
| // won't lead to incorrect code. |
| static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB, |
| coro::Shape &Shape) { |
| auto Wrapper = CB->getWrapperFunction(); |
| auto Awaiter = CB->getAwaiter(); |
| auto FramePtr = CB->getFrame(); |
| |
| Builder.SetInsertPoint(CB); |
| |
| CallBase *NewCall = nullptr; |
| // await_suspend has only 2 parameters, awaiter and handle. |
| // Copy parameter attributes from the intrinsic call, but remove the last, |
| // because the last parameter now becomes the function that is being called. |
| AttributeList NewAttributes = |
| CB->getAttributes().removeParamAttributes(CB->getContext(), 2); |
| |
| if (auto Invoke = dyn_cast<InvokeInst>(CB)) { |
| auto WrapperInvoke = |
| Builder.CreateInvoke(Wrapper, Invoke->getNormalDest(), |
| Invoke->getUnwindDest(), {Awaiter, FramePtr}); |
| |
| WrapperInvoke->setCallingConv(Invoke->getCallingConv()); |
| std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(), |
| WrapperInvoke->bundle_op_info_begin()); |
| WrapperInvoke->setAttributes(NewAttributes); |
| WrapperInvoke->setDebugLoc(Invoke->getDebugLoc()); |
| NewCall = WrapperInvoke; |
| } else if (auto Call = dyn_cast<CallInst>(CB)) { |
| auto WrapperCall = Builder.CreateCall(Wrapper, {Awaiter, FramePtr}); |
| |
| WrapperCall->setAttributes(NewAttributes); |
| WrapperCall->setDebugLoc(Call->getDebugLoc()); |
| NewCall = WrapperCall; |
| } else { |
| llvm_unreachable("Unexpected coro_await_suspend invocation method"); |
| } |
| |
| if (CB->getCalledFunction()->getIntrinsicID() == |
| Intrinsic::coro_await_suspend_handle) { |
| // Follow the lowered await_suspend call above with a lowered resume call |
| // to the returned coroutine. |
| if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { |
| // If the await_suspend call is an invoke, we continue in the next block. |
| Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); |
| } |
| |
| coro::LowererBase LB(*Wrapper->getParent()); |
| auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex, |
| &*Builder.GetInsertPoint()); |
| |
| LLVMContext &Ctx = Builder.getContext(); |
| FunctionType *ResumeTy = FunctionType::get( |
| Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false); |
| auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall}); |
| ResumeCall->setCallingConv(CallingConv::Fast); |
| |
| // We can't insert the 'ret' instruction and adjust the cc until the |
| // function has been split, so remember this for later. |
| Shape.SymmetricTransfers.push_back(ResumeCall); |
| |
| NewCall = ResumeCall; |
| } |
| |
| CB->replaceAllUsesWith(NewCall); |
| CB->eraseFromParent(); |
| } |
| |
| static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { |
| IRBuilder<> Builder(F.getContext()); |
| for (auto *AWS : Shape.CoroAwaitSuspends) |
| lowerAwaitSuspend(Builder, AWS, Shape); |
| } |
| |
| static void maybeFreeRetconStorage(IRBuilder<> &Builder, |
| const coro::Shape &Shape, Value *FramePtr, |
| CallGraph *CG) { |
| assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); |
| if (Shape.RetconLowering.IsFrameInlineInStorage) |
| return; |
| |
| Shape.emitDealloc(Builder, FramePtr, CG); |
| } |
| |
| /// Replace an llvm.coro.end.async. |
| /// Will inline the must tail call function call if there is one. |
| /// \returns true if cleanup of the coro.end block is needed, false otherwise. |
| static bool replaceCoroEndAsync(AnyCoroEndInst *End) { |
| IRBuilder<> Builder(End); |
| |
| auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End); |
| if (!EndAsync) { |
| Builder.CreateRetVoid(); |
| return true /*needs cleanup of coro.end block*/; |
| } |
| |
| auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); |
| if (!MustTailCallFunc) { |
| Builder.CreateRetVoid(); |
| return true /*needs cleanup of coro.end block*/; |
| } |
| |
| // Move the must tail call from the predecessor block into the end block. |
| auto *CoroEndBlock = End->getParent(); |
| auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); |
| assert(MustTailCallFuncBlock && "Must have a single predecessor block"); |
| auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); |
| auto *MustTailCall = cast<CallInst>(&*std::prev(It)); |
| CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock, |
| MustTailCall->getIterator()); |
| |
| // Insert the return instruction. |
| Builder.SetInsertPoint(End); |
| Builder.CreateRetVoid(); |
| InlineFunctionInfo FnInfo; |
| |
| // Remove the rest of the block, by splitting it into an unreachable block. |
| auto *BB = End->getParent(); |
| BB->splitBasicBlock(End); |
| BB->getTerminator()->eraseFromParent(); |
| |
| auto InlineRes = InlineFunction(*MustTailCall, FnInfo); |
| assert(InlineRes.isSuccess() && "Expected inlining to succeed"); |
| (void)InlineRes; |
| |
| // We have cleaned up the coro.end block above. |
| return false; |
| } |
| |
| /// Replace a non-unwind call to llvm.coro.end. |
| static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, |
| const coro::Shape &Shape, Value *FramePtr, |
| bool InResume, CallGraph *CG) { |
| // Start inserting right before the coro.end. |
| IRBuilder<> Builder(End); |
| |
| // Create the return instruction. |
| switch (Shape.ABI) { |
| // The cloned functions in switch-lowering always return void. |
| case coro::ABI::Switch: |
| assert(!cast<CoroEndInst>(End)->hasResults() && |
| "switch coroutine should not return any values"); |
| // coro.end doesn't immediately end the coroutine in the main function |
| // in this lowering, because we need to deallocate the coroutine. |
| if (!InResume) |
| return; |
| Builder.CreateRetVoid(); |
| break; |
| |
| // In async lowering this returns. |
| case coro::ABI::Async: { |
| bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); |
| if (!CoroEndBlockNeedsCleanup) |
| return; |
| break; |
| } |
| |
| // In unique continuation lowering, the continuations always return void. |
| // But we may have implicitly allocated storage. |
| case coro::ABI::RetconOnce: { |
| maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); |
| auto *CoroEnd = cast<CoroEndInst>(End); |
| auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); |
| |
| if (!CoroEnd->hasResults()) { |
| assert(RetTy->isVoidTy()); |
| Builder.CreateRetVoid(); |
| break; |
| } |
| |
| auto *CoroResults = CoroEnd->getResults(); |
| unsigned NumReturns = CoroResults->numReturns(); |
| |
| if (auto *RetStructTy = dyn_cast<StructType>(RetTy)) { |
| assert(RetStructTy->getNumElements() == NumReturns && |
| "numbers of returns should match resume function singature"); |
| Value *ReturnValue = PoisonValue::get(RetStructTy); |
| unsigned Idx = 0; |
| for (Value *RetValEl : CoroResults->return_values()) |
| ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++); |
| Builder.CreateRet(ReturnValue); |
| } else if (NumReturns == 0) { |
| assert(RetTy->isVoidTy()); |
| Builder.CreateRetVoid(); |
| } else { |
| assert(NumReturns == 1); |
| Builder.CreateRet(*CoroResults->retval_begin()); |
| } |
| CoroResults->replaceAllUsesWith( |
| ConstantTokenNone::get(CoroResults->getContext())); |
| CoroResults->eraseFromParent(); |
| break; |
| } |
| |
| // In non-unique continuation lowering, we signal completion by returning |
| // a null continuation. |
| case coro::ABI::Retcon: { |
| assert(!cast<CoroEndInst>(End)->hasResults() && |
| "retcon coroutine should not return any values"); |
| maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); |
| auto RetTy = Shape.getResumeFunctionType()->getReturnType(); |
| auto RetStructTy = dyn_cast<StructType>(RetTy); |
| PointerType *ContinuationTy = |
| cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); |
| |
| Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); |
| if (RetStructTy) { |
| ReturnValue = Builder.CreateInsertValue(PoisonValue::get(RetStructTy), |
| ReturnValue, 0); |
| } |
| Builder.CreateRet(ReturnValue); |
| break; |
| } |
| } |
| |
| // Remove the rest of the block, by splitting it into an unreachable block. |
| auto *BB = End->getParent(); |
| BB->splitBasicBlock(End); |
| BB->getTerminator()->eraseFromParent(); |
| } |
| |
| // Mark a coroutine as done, which implies that the coroutine is finished and |
| // never get resumed. |
| // |
| // In resume-switched ABI, the done state is represented by storing zero in |
| // ResumeFnAddr. |
| // |
| // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the |
| // pointer to the frame in splitted function is not stored in `Shape`. |
| static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape, |
| Value *FramePtr) { |
| assert( |
| Shape.ABI == coro::ABI::Switch && |
| "markCoroutineAsDone is only supported for Switch-Resumed ABI for now."); |
| auto *GepIndex = Builder.CreateStructGEP( |
| Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume, |
| "ResumeFn.addr"); |
| auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( |
| Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume))); |
| Builder.CreateStore(NullPtr, GepIndex); |
| |
| // If the coroutine don't have unwind coro end, we could omit the store to |
| // the final suspend point since we could infer the coroutine is suspended |
| // at the final suspend point by the nullness of ResumeFnAddr. |
| // However, we can't skip it if the coroutine have unwind coro end. Since |
| // the coroutine reaches unwind coro end is considered suspended at the |
| // final suspend point (the ResumeFnAddr is null) but in fact the coroutine |
| // didn't complete yet. We need the IndexVal for the final suspend point |
| // to make the states clear. |
| if (Shape.SwitchLowering.HasUnwindCoroEnd && |
| Shape.SwitchLowering.HasFinalSuspend) { |
| assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() && |
| "The final suspend should only live in the last position of " |
| "CoroSuspends."); |
| ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1); |
| auto *FinalIndex = Builder.CreateStructGEP( |
| Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); |
| |
| Builder.CreateStore(IndexVal, FinalIndex); |
| } |
| } |
| |
| /// Replace an unwind call to llvm.coro.end. |
| static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, |
| Value *FramePtr, bool InResume, |
| CallGraph *CG) { |
| IRBuilder<> Builder(End); |
| |
| switch (Shape.ABI) { |
| // In switch-lowering, this does nothing in the main function. |
| case coro::ABI::Switch: { |
| // In C++'s specification, the coroutine should be marked as done |
| // if promise.unhandled_exception() throws. The frontend will |
| // call coro.end(true) along this path. |
| // |
| // FIXME: We should refactor this once there is other language |
| // which uses Switch-Resumed style other than C++. |
| markCoroutineAsDone(Builder, Shape, FramePtr); |
| if (!InResume) |
| return; |
| break; |
| } |
| // In async lowering this does nothing. |
| case coro::ABI::Async: |
| break; |
| // In continuation-lowering, this frees the continuation storage. |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: |
| maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); |
| break; |
| } |
| |
| // If coro.end has an associated bundle, add cleanupret instruction. |
| if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { |
| auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); |
| auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); |
| End->getParent()->splitBasicBlock(End); |
| CleanupRet->getParent()->getTerminator()->eraseFromParent(); |
| } |
| } |
| |
| static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, |
| Value *FramePtr, bool InResume, CallGraph *CG) { |
| if (End->isUnwind()) |
| replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); |
| else |
| replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); |
| |
| auto &Context = End->getContext(); |
| End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) |
| : ConstantInt::getFalse(Context)); |
| End->eraseFromParent(); |
| } |
| |
| // In the resume function, we remove the last case (when coro::Shape is built, |
| // the final suspend point (if present) is always the last element of |
| // CoroSuspends array) since it is an undefined behavior to resume a coroutine |
| // suspended at the final suspend point. |
| // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL |
| // and the coroutine doesn't suspend at the final suspend point actually (this |
| // is possible since the coroutine is considered suspended at the final suspend |
| // point if promise.unhandled_exception() exits via an exception), we can |
| // remove the last case. |
| void coro::BaseCloner::handleFinalSuspend() { |
| assert(Shape.ABI == coro::ABI::Switch && |
| Shape.SwitchLowering.HasFinalSuspend); |
| |
| if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) |
| return; |
| |
| auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); |
| auto FinalCaseIt = std::prev(Switch->case_end()); |
| BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); |
| Switch->removeCase(FinalCaseIt); |
| if (isSwitchDestroyFunction()) { |
| BasicBlock *OldSwitchBB = Switch->getParent(); |
| auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); |
| Builder.SetInsertPoint(OldSwitchBB->getTerminator()); |
| |
| if (NewF->isCoroOnlyDestroyWhenComplete()) { |
| // When the coroutine can only be destroyed when complete, we don't need |
| // to generate code for other cases. |
| Builder.CreateBr(ResumeBB); |
| } else { |
| auto *GepIndex = Builder.CreateStructGEP( |
| Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume, |
| "ResumeFn.addr"); |
| auto *Load = |
| Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex); |
| auto *Cond = Builder.CreateIsNull(Load); |
| Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); |
| } |
| OldSwitchBB->getTerminator()->eraseFromParent(); |
| } |
| } |
| |
| static FunctionType * |
| getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { |
| auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend); |
| auto *StructTy = cast<StructType>(AsyncSuspend->getType()); |
| auto &Context = Suspend->getParent()->getParent()->getContext(); |
| auto *VoidTy = Type::getVoidTy(Context); |
| return FunctionType::get(VoidTy, StructTy->elements(), false); |
| } |
| |
| static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, |
| const Twine &Suffix, |
| Module::iterator InsertBefore, |
| AnyCoroSuspendInst *ActiveSuspend) { |
| Module *M = OrigF.getParent(); |
| auto *FnTy = (Shape.ABI != coro::ABI::Async) |
| ? Shape.getResumeFunctionType() |
| : getFunctionTypeFromAsyncSuspend(ActiveSuspend); |
| |
| Function *NewF = |
| Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, |
| OrigF.getName() + Suffix); |
| |
| M->getFunctionList().insert(InsertBefore, NewF); |
| |
| return NewF; |
| } |
| |
| /// Replace uses of the active llvm.coro.suspend.retcon/async call with the |
| /// arguments to the continuation function. |
| /// |
| /// This assumes that the builder has a meaningful insertion point. |
| void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() { |
| assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || |
| Shape.ABI == coro::ABI::Async); |
| |
| auto NewS = VMap[ActiveSuspend]; |
| if (NewS->use_empty()) |
| return; |
| |
| // Copy out all the continuation arguments after the buffer pointer into |
| // an easily-indexed data structure for convenience. |
| SmallVector<Value *, 8> Args; |
| // The async ABI includes all arguments -- including the first argument. |
| bool IsAsyncABI = Shape.ABI == coro::ABI::Async; |
| for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), |
| E = NewF->arg_end(); |
| I != E; ++I) |
| Args.push_back(&*I); |
| |
| // If the suspend returns a single scalar value, we can just do a simple |
| // replacement. |
| if (!isa<StructType>(NewS->getType())) { |
| assert(Args.size() == 1); |
| NewS->replaceAllUsesWith(Args.front()); |
| return; |
| } |
| |
| // Try to peephole extracts of an aggregate return. |
| for (Use &U : llvm::make_early_inc_range(NewS->uses())) { |
| auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()); |
| if (!EVI || EVI->getNumIndices() != 1) |
| continue; |
| |
| EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); |
| EVI->eraseFromParent(); |
| } |
| |
| // If we have no remaining uses, we're done. |
| if (NewS->use_empty()) |
| return; |
| |
| // Otherwise, we need to create an aggregate. |
| Value *Aggr = PoisonValue::get(NewS->getType()); |
| for (auto [Idx, Arg] : llvm::enumerate(Args)) |
| Aggr = Builder.CreateInsertValue(Aggr, Arg, Idx); |
| |
| NewS->replaceAllUsesWith(Aggr); |
| } |
| |
| void coro::BaseCloner::replaceCoroSuspends() { |
| Value *SuspendResult; |
| |
| switch (Shape.ABI) { |
| // In switch lowering, replace coro.suspend with the appropriate value |
| // for the type of function we're extracting. |
| // Replacing coro.suspend with (0) will result in control flow proceeding to |
| // a resume label associated with a suspend point, replacing it with (1) will |
| // result in control flow proceeding to a cleanup label associated with this |
| // suspend point. |
| case coro::ABI::Switch: |
| SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); |
| break; |
| |
| // In async lowering there are no uses of the result. |
| case coro::ABI::Async: |
| return; |
| |
| // In returned-continuation lowering, the arguments from earlier |
| // continuations are theoretically arbitrary, and they should have been |
| // spilled. |
| case coro::ABI::RetconOnce: |
| case coro::ABI::Retcon: |
| return; |
| } |
| |
| for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { |
| // The active suspend was handled earlier. |
| if (CS == ActiveSuspend) |
| continue; |
| |
| auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); |
| MappedCS->replaceAllUsesWith(SuspendResult); |
| MappedCS->eraseFromParent(); |
| } |
| } |
| |
| void coro::BaseCloner::replaceCoroEnds() { |
| for (AnyCoroEndInst *CE : Shape.CoroEnds) { |
| // We use a null call graph because there's no call graph node for |
| // the cloned function yet. We'll just be rebuilding that later. |
| auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]); |
| replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); |
| } |
| } |
| |
| static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, |
| ValueToValueMapTy *VMap) { |
| if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) |
| return; |
| Value *CachedSlot = nullptr; |
| auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { |
| if (CachedSlot) |
| return CachedSlot; |
| |
| // Check if the function has a swifterror argument. |
| for (auto &Arg : F.args()) { |
| if (Arg.isSwiftError()) { |
| CachedSlot = &Arg; |
| return &Arg; |
| } |
| } |
| |
| // Create a swifterror alloca. |
| IRBuilder<> Builder(&F.getEntryBlock(), |
| F.getEntryBlock().getFirstNonPHIOrDbg()); |
| auto Alloca = Builder.CreateAlloca(ValueTy); |
| Alloca->setSwiftError(true); |
| |
| CachedSlot = Alloca; |
| return Alloca; |
| }; |
| |
| for (CallInst *Op : Shape.SwiftErrorOps) { |
| auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; |
| IRBuilder<> Builder(MappedOp); |
| |
| // If there are no arguments, this is a 'get' operation. |
| Value *MappedResult; |
| if (Op->arg_empty()) { |
| auto ValueTy = Op->getType(); |
| auto Slot = getSwiftErrorSlot(ValueTy); |
| MappedResult = Builder.CreateLoad(ValueTy, Slot); |
| } else { |
| assert(Op->arg_size() == 1); |
| auto Value = MappedOp->getArgOperand(0); |
| auto ValueTy = Value->getType(); |
| auto Slot = getSwiftErrorSlot(ValueTy); |
| Builder.CreateStore(Value, Slot); |
| MappedResult = Slot; |
| } |
| |
| MappedOp->replaceAllUsesWith(MappedResult); |
| MappedOp->eraseFromParent(); |
| } |
| |
| // If we're updating the original function, we've invalidated SwiftErrorOps. |
| if (VMap == nullptr) { |
| Shape.SwiftErrorOps.clear(); |
| } |
| } |
| |
| /// Returns all DbgVariableIntrinsic in F. |
| static std::pair<SmallVector<DbgVariableIntrinsic *, 8>, |
| SmallVector<DbgVariableRecord *>> |
| collectDbgVariableIntrinsics(Function &F) { |
| SmallVector<DbgVariableIntrinsic *, 8> Intrinsics; |
| SmallVector<DbgVariableRecord *> DbgVariableRecords; |
| for (auto &I : instructions(F)) { |
| for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) |
| DbgVariableRecords.push_back(&DVR); |
| if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) |
| Intrinsics.push_back(DVI); |
| } |
| return {Intrinsics, DbgVariableRecords}; |
| } |
| |
| void coro::BaseCloner::replaceSwiftErrorOps() { |
| ::replaceSwiftErrorOps(*NewF, Shape, &VMap); |
| } |
| |
| void coro::BaseCloner::salvageDebugInfo() { |
| auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(*NewF); |
| SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; |
| |
| // Only 64-bit ABIs have a register we can refer to with the entry value. |
| bool UseEntryValue = OrigF.getParent()->getTargetTriple().isArch64Bit(); |
| for (DbgVariableIntrinsic *DVI : Worklist) |
| coro::salvageDebugInfo(ArgToAllocaMap, *DVI, UseEntryValue); |
| for (DbgVariableRecord *DVR : DbgVariableRecords) |
| coro::salvageDebugInfo(ArgToAllocaMap, *DVR, UseEntryValue); |
| |
| // Remove all salvaged dbg.declare intrinsics that became |
| // either unreachable or stale due to the CoroSplit transformation. |
| DominatorTree DomTree(*NewF); |
| auto IsUnreachableBlock = [&](BasicBlock *BB) { |
| return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr, |
| &DomTree); |
| }; |
| auto RemoveOne = [&](auto *DVI) { |
| if (IsUnreachableBlock(DVI->getParent())) |
| DVI->eraseFromParent(); |
| else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) { |
| // Count all non-debuginfo uses in reachable blocks. |
| unsigned Uses = 0; |
| for (auto *User : DVI->getVariableLocationOp(0)->users()) |
| if (auto *I = dyn_cast<Instruction>(User)) |
| if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent())) |
| ++Uses; |
| if (!Uses) |
| DVI->eraseFromParent(); |
| } |
| }; |
| for_each(Worklist, RemoveOne); |
| for_each(DbgVariableRecords, RemoveOne); |
| } |
| |
| void coro::BaseCloner::replaceEntryBlock() { |
| // In the original function, the AllocaSpillBlock is a block immediately |
| // following the allocation of the frame object which defines GEPs for |
| // all the allocas that have been moved into the frame, and it ends by |
| // branching to the original beginning of the coroutine. Make this |
| // the entry block of the cloned function. |
| auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); |
| auto *OldEntry = &NewF->getEntryBlock(); |
| Entry->setName("entry" + Suffix); |
| Entry->moveBefore(OldEntry); |
| Entry->getTerminator()->eraseFromParent(); |
| |
| // Clear all predecessors of the new entry block. There should be |
| // exactly one predecessor, which we created when splitting out |
| // AllocaSpillBlock to begin with. |
| assert(Entry->hasOneUse()); |
| auto BranchToEntry = cast<BranchInst>(Entry->user_back()); |
| assert(BranchToEntry->isUnconditional()); |
| Builder.SetInsertPoint(BranchToEntry); |
| Builder.CreateUnreachable(); |
| BranchToEntry->eraseFromParent(); |
| |
| // Branch from the entry to the appropriate place. |
| Builder.SetInsertPoint(Entry); |
| switch (Shape.ABI) { |
| case coro::ABI::Switch: { |
| // In switch-lowering, we built a resume-entry block in the original |
| // function. Make the entry block branch to this. |
| auto *SwitchBB = |
| cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); |
| Builder.CreateBr(SwitchBB); |
| break; |
| } |
| case coro::ABI::Async: |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: { |
| // In continuation ABIs, we want to branch to immediately after the |
| // active suspend point. Earlier phases will have put the suspend in its |
| // own basic block, so just thread our jump directly to its successor. |
| assert((Shape.ABI == coro::ABI::Async && |
| isa<CoroSuspendAsyncInst>(ActiveSuspend)) || |
| ((Shape.ABI == coro::ABI::Retcon || |
| Shape.ABI == coro::ABI::RetconOnce) && |
| isa<CoroSuspendRetconInst>(ActiveSuspend))); |
| auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]); |
| auto Branch = cast<BranchInst>(MappedCS->getNextNode()); |
| assert(Branch->isUnconditional()); |
| Builder.CreateBr(Branch->getSuccessor(0)); |
| break; |
| } |
| } |
| |
| // Any static alloca that's still being used but not reachable from the new |
| // entry needs to be moved to the new entry. |
| Function *F = OldEntry->getParent(); |
| DominatorTree DT{*F}; |
| for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { |
| auto *Alloca = dyn_cast<AllocaInst>(&I); |
| if (!Alloca || I.use_empty()) |
| continue; |
| if (DT.isReachableFromEntry(I.getParent()) || |
| !isa<ConstantInt>(Alloca->getArraySize())) |
| continue; |
| I.moveBefore(*Entry, Entry->getFirstInsertionPt()); |
| } |
| } |
| |
| /// Derive the value of the new frame pointer. |
| Value *coro::BaseCloner::deriveNewFramePointer() { |
| // Builder should be inserting to the front of the new entry block. |
| |
| switch (Shape.ABI) { |
| // In switch-lowering, the argument is the frame pointer. |
| case coro::ABI::Switch: |
| return &*NewF->arg_begin(); |
| // In async-lowering, one of the arguments is an async context as determined |
| // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of |
| // the resume function from the async context projection function associated |
| // with the active suspend. The frame is located as a tail to the async |
| // context header. |
| case coro::ABI::Async: { |
| auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); |
| auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; |
| auto *CalleeContext = NewF->getArg(ContextIdx); |
| auto *ProjectionFunc = |
| ActiveAsyncSuspend->getAsyncContextProjectionFunction(); |
| auto DbgLoc = |
| cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc(); |
| // Calling i8* (i8*) |
| auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(), |
| ProjectionFunc, CalleeContext); |
| CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); |
| CallerContext->setDebugLoc(DbgLoc); |
| // The frame is located after the async_context header. |
| auto &Context = Builder.getContext(); |
| auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( |
| Type::getInt8Ty(Context), CallerContext, |
| Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); |
| // Inline the projection function. |
| InlineFunctionInfo InlineInfo; |
| auto InlineRes = InlineFunction(*CallerContext, InlineInfo); |
| assert(InlineRes.isSuccess()); |
| (void)InlineRes; |
| return FramePtrAddr; |
| } |
| // In continuation-lowering, the argument is the opaque storage. |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: { |
| Argument *NewStorage = &*NewF->arg_begin(); |
| auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext()); |
| |
| // If the storage is inline, just bitcast to the storage to the frame type. |
| if (Shape.RetconLowering.IsFrameInlineInStorage) |
| return NewStorage; |
| |
| // Otherwise, load the real frame from the opaque storage. |
| return Builder.CreateLoad(FramePtrTy, NewStorage); |
| } |
| } |
| llvm_unreachable("bad ABI"); |
| } |
| |
| /// Adjust the scope line of the funclet to the first line number after the |
| /// suspend point. This avoids a jump in the line table from the function |
| /// declaration (where prologue instructions are attributed to) to the suspend |
| /// point. |
| /// Only adjust the scope line when the files are the same. |
| /// If no candidate line number is found, fallback to the line of ActiveSuspend. |
| static void updateScopeLine(Instruction *ActiveSuspend, |
| DISubprogram &SPToUpdate) { |
| if (!ActiveSuspend) |
| return; |
| |
| // No subsequent instruction -> fallback to the location of ActiveSuspend. |
| if (!ActiveSuspend->getNextNonDebugInstruction()) { |
| if (auto DL = ActiveSuspend->getDebugLoc()) |
| if (SPToUpdate.getFile() == DL->getFile()) |
| SPToUpdate.setScopeLine(DL->getLine()); |
| return; |
| } |
| |
| BasicBlock::iterator Successor = |
| ActiveSuspend->getNextNonDebugInstruction()->getIterator(); |
| // Corosplit splits the BB around ActiveSuspend, so the meaningful |
| // instructions are not in the same BB. |
| if (auto *Branch = dyn_cast_or_null<BranchInst>(Successor); |
| Branch && Branch->isUnconditional()) |
| Successor = Branch->getSuccessor(0)->getFirstNonPHIOrDbg(); |
| |
| // Find the first successor of ActiveSuspend with a non-zero line location. |
| // If that matches the file of ActiveSuspend, use it. |
| BasicBlock *PBB = Successor->getParent(); |
| for (; Successor != PBB->end(); Successor = std::next(Successor)) { |
| Successor = skipDebugIntrinsics(Successor); |
| auto DL = Successor->getDebugLoc(); |
| if (!DL || DL.getLine() == 0) |
| continue; |
| |
| if (SPToUpdate.getFile() == DL->getFile()) { |
| SPToUpdate.setScopeLine(DL.getLine()); |
| return; |
| } |
| |
| break; |
| } |
| |
| // If the search above failed, fallback to the location of ActiveSuspend. |
| if (auto DL = ActiveSuspend->getDebugLoc()) |
| if (SPToUpdate.getFile() == DL->getFile()) |
| SPToUpdate.setScopeLine(DL->getLine()); |
| } |
| |
| static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, |
| unsigned ParamIndex, uint64_t Size, |
| Align Alignment, bool NoAlias) { |
| AttrBuilder ParamAttrs(Context); |
| ParamAttrs.addAttribute(Attribute::NonNull); |
| ParamAttrs.addAttribute(Attribute::NoUndef); |
| |
| if (NoAlias) |
| ParamAttrs.addAttribute(Attribute::NoAlias); |
| |
| ParamAttrs.addAlignmentAttr(Alignment); |
| ParamAttrs.addDereferenceableAttr(Size); |
| Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); |
| } |
| |
| static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, |
| unsigned ParamIndex) { |
| AttrBuilder ParamAttrs(Context); |
| ParamAttrs.addAttribute(Attribute::SwiftAsync); |
| Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); |
| } |
| |
| static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, |
| unsigned ParamIndex) { |
| AttrBuilder ParamAttrs(Context); |
| ParamAttrs.addAttribute(Attribute::SwiftSelf); |
| Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); |
| } |
| |
| /// Clone the body of the original function into a resume function of |
| /// some sort. |
| void coro::BaseCloner::create() { |
| assert(NewF); |
| |
| // Replace all args with dummy instructions. If an argument is the old frame |
| // pointer, the dummy will be replaced by the new frame pointer once it is |
| // computed below. Uses of all other arguments should have already been |
| // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine |
| // frame. |
| SmallVector<Instruction *> DummyArgs; |
| for (Argument &A : OrigF.args()) { |
| DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType()))); |
| VMap[&A] = DummyArgs.back(); |
| } |
| |
| SmallVector<ReturnInst *, 4> Returns; |
| |
| // Ignore attempts to change certain attributes of the function. |
| // TODO: maybe there should be a way to suppress this during cloning? |
| auto savedVisibility = NewF->getVisibility(); |
| auto savedUnnamedAddr = NewF->getUnnamedAddr(); |
| auto savedDLLStorageClass = NewF->getDLLStorageClass(); |
| |
| // NewF's linkage (which CloneFunctionInto does *not* change) might not |
| // be compatible with the visibility of OrigF (which it *does* change), |
| // so protect against that. |
| auto savedLinkage = NewF->getLinkage(); |
| NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); |
| |
| CloneFunctionAttributesInto(NewF, &OrigF, VMap, false); |
| CloneFunctionMetadataInto(*NewF, OrigF, VMap, RF_None, nullptr, nullptr, |
| &CommonDebugInfo); |
| CloneFunctionBodyInto(*NewF, OrigF, VMap, RF_None, Returns, "", nullptr, |
| nullptr, nullptr, &CommonDebugInfo); |
| |
| auto &Context = NewF->getContext(); |
| |
| if (DISubprogram *SP = NewF->getSubprogram()) { |
| assert(SP != OrigF.getSubprogram() && SP->isDistinct()); |
| updateScopeLine(ActiveSuspend, *SP); |
| |
| // Update the linkage name to reflect the modified symbol name. It |
| // is necessary to update the linkage name in Swift, since the |
| // mangling changes for resume functions. It might also be the |
| // right thing to do in C++, but due to a limitation in LLVM's |
| // AsmPrinter we can only do this if the function doesn't have an |
| // abstract specification, since the DWARF backend expects the |
| // abstract specification to contain the linkage name and asserts |
| // that they are identical. |
| if (SP->getUnit() && |
| SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) { |
| SP->replaceLinkageName(MDString::get(Context, NewF->getName())); |
| if (auto *Decl = SP->getDeclaration()) { |
| auto *NewDecl = DISubprogram::get( |
| Decl->getContext(), Decl->getScope(), Decl->getName(), |
| NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(), |
| Decl->getScopeLine(), Decl->getContainingType(), |
| Decl->getVirtualIndex(), Decl->getThisAdjustment(), |
| Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(), |
| Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(), |
| Decl->getThrownTypes(), Decl->getAnnotations(), |
| Decl->getTargetFuncName()); |
| SP->replaceDeclaration(NewDecl); |
| } |
| } |
| } |
| |
| NewF->setLinkage(savedLinkage); |
| NewF->setVisibility(savedVisibility); |
| NewF->setUnnamedAddr(savedUnnamedAddr); |
| NewF->setDLLStorageClass(savedDLLStorageClass); |
| // The function sanitizer metadata needs to match the signature of the |
| // function it is being attached to. However this does not hold for split |
| // functions here. Thus remove the metadata for split functions. |
| if (Shape.ABI == coro::ABI::Switch && |
| NewF->hasMetadata(LLVMContext::MD_func_sanitize)) |
| NewF->eraseMetadata(LLVMContext::MD_func_sanitize); |
| |
| // Replace the attributes of the new function: |
| auto OrigAttrs = NewF->getAttributes(); |
| auto NewAttrs = AttributeList(); |
| |
| switch (Shape.ABI) { |
| case coro::ABI::Switch: |
| // Bootstrap attributes by copying function attributes from the |
| // original function. This should include optimization settings and so on. |
| NewAttrs = NewAttrs.addFnAttributes( |
| Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); |
| |
| addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, |
| Shape.FrameAlign, /*NoAlias=*/false); |
| break; |
| case coro::ABI::Async: { |
| auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); |
| if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo, |
| Attribute::SwiftAsync)) { |
| uint32_t ArgAttributeIndices = |
| ActiveAsyncSuspend->getStorageArgumentIndex(); |
| auto ContextArgIndex = ArgAttributeIndices & 0xff; |
| addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex); |
| |
| // `swiftasync` must preceed `swiftself` so 0 is not a valid index for |
| // `swiftself`. |
| auto SwiftSelfIndex = ArgAttributeIndices >> 8; |
| if (SwiftSelfIndex) |
| addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex); |
| } |
| |
| // Transfer the original function's attributes. |
| auto FnAttrs = OrigF.getAttributes().getFnAttrs(); |
| NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs)); |
| break; |
| } |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: |
| // If we have a continuation prototype, just use its attributes, |
| // full-stop. |
| NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); |
| |
| /// FIXME: Is it really good to add the NoAlias attribute? |
| addFramePointerAttrs(NewAttrs, Context, 0, |
| Shape.getRetconCoroId()->getStorageSize(), |
| Shape.getRetconCoroId()->getStorageAlignment(), |
| /*NoAlias=*/true); |
| |
| break; |
| } |
| |
| switch (Shape.ABI) { |
| // In these ABIs, the cloned functions always return 'void', and the |
| // existing return sites are meaningless. Note that for unique |
| // continuations, this includes the returns associated with suspends; |
| // this is fine because we can't suspend twice. |
| case coro::ABI::Switch: |
| case coro::ABI::RetconOnce: |
| // Remove old returns. |
| for (ReturnInst *Return : Returns) |
| changeToUnreachable(Return); |
| break; |
| |
| // With multi-suspend continuations, we'll already have eliminated the |
| // original returns and inserted returns before all the suspend points, |
| // so we want to leave any returns in place. |
| case coro::ABI::Retcon: |
| break; |
| // Async lowering will insert musttail call functions at all suspend points |
| // followed by a return. |
| // Don't change returns to unreachable because that will trip up the verifier. |
| // These returns should be unreachable from the clone. |
| case coro::ABI::Async: |
| break; |
| } |
| |
| NewF->setAttributes(NewAttrs); |
| NewF->setCallingConv(Shape.getResumeFunctionCC()); |
| |
| // Set up the new entry block. |
| replaceEntryBlock(); |
| |
| // Turn symmetric transfers into musttail calls. |
| for (CallInst *ResumeCall : Shape.SymmetricTransfers) { |
| ResumeCall = cast<CallInst>(VMap[ResumeCall]); |
| if (TTI.supportsTailCallFor(ResumeCall)) { |
| // FIXME: Could we support symmetric transfer effectively without |
| // musttail? |
| ResumeCall->setTailCallKind(CallInst::TCK_MustTail); |
| } |
| |
| // Put a 'ret void' after the call, and split any remaining instructions to |
| // an unreachable block. |
| BasicBlock *BB = ResumeCall->getParent(); |
| BB->splitBasicBlock(ResumeCall->getNextNode()); |
| Builder.SetInsertPoint(BB->getTerminator()); |
| Builder.CreateRetVoid(); |
| BB->getTerminator()->eraseFromParent(); |
| } |
| |
| Builder.SetInsertPoint(&NewF->getEntryBlock().front()); |
| NewFramePtr = deriveNewFramePointer(); |
| |
| // Remap frame pointer. |
| Value *OldFramePtr = VMap[Shape.FramePtr]; |
| NewFramePtr->takeName(OldFramePtr); |
| OldFramePtr->replaceAllUsesWith(NewFramePtr); |
| |
| // Remap vFrame pointer. |
| auto *NewVFrame = Builder.CreateBitCast( |
| NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame"); |
| Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); |
| if (OldVFrame != NewVFrame) |
| OldVFrame->replaceAllUsesWith(NewVFrame); |
| |
| // All uses of the arguments should have been resolved by this point, |
| // so we can safely remove the dummy values. |
| for (Instruction *DummyArg : DummyArgs) { |
| DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType())); |
| DummyArg->deleteValue(); |
| } |
| |
| switch (Shape.ABI) { |
| case coro::ABI::Switch: |
| // Rewrite final suspend handling as it is not done via switch (allows to |
| // remove final case from the switch, since it is undefined behavior to |
| // resume the coroutine suspended at the final suspend point. |
| if (Shape.SwitchLowering.HasFinalSuspend) |
| handleFinalSuspend(); |
| break; |
| case coro::ABI::Async: |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: |
| // Replace uses of the active suspend with the corresponding |
| // continuation-function arguments. |
| assert(ActiveSuspend != nullptr && |
| "no active suspend when lowering a continuation-style coroutine"); |
| replaceRetconOrAsyncSuspendUses(); |
| break; |
| } |
| |
| // Handle suspends. |
| replaceCoroSuspends(); |
| |
| // Handle swifterror. |
| replaceSwiftErrorOps(); |
| |
| // Remove coro.end intrinsics. |
| replaceCoroEnds(); |
| |
| // Salvage debug info that points into the coroutine frame. |
| salvageDebugInfo(); |
| } |
| |
| void coro::SwitchCloner::create() { |
| // Create a new function matching the original type |
| NewF = createCloneDeclaration(OrigF, Shape, Suffix, OrigF.getParent()->end(), |
| ActiveSuspend); |
| |
| // Clone the function |
| coro::BaseCloner::create(); |
| |
| // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, |
| // to suppress deallocation code. |
| coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), |
| /*Elide=*/FKind == coro::CloneKind::SwitchCleanup); |
| } |
| |
| static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { |
| assert(Shape.ABI == coro::ABI::Async); |
| |
| auto *FuncPtrStruct = cast<ConstantStruct>( |
| Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); |
| auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); |
| auto *OrigContextSize = FuncPtrStruct->getOperand(1); |
| auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), |
| Shape.AsyncLowering.ContextSize); |
| auto *NewFuncPtrStruct = ConstantStruct::get( |
| FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); |
| |
| Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); |
| } |
| |
| static TypeSize getFrameSizeForShape(coro::Shape &Shape) { |
| // In the same function all coro.sizes should have the same result type. |
| auto *SizeIntrin = Shape.CoroSizes.back(); |
| Module *M = SizeIntrin->getModule(); |
| const DataLayout &DL = M->getDataLayout(); |
| return DL.getTypeAllocSize(Shape.FrameTy); |
| } |
| |
| static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { |
| if (Shape.ABI == coro::ABI::Async) |
| updateAsyncFuncPointerContextSize(Shape); |
| |
| for (CoroAlignInst *CA : Shape.CoroAligns) { |
| CA->replaceAllUsesWith( |
| ConstantInt::get(CA->getType(), Shape.FrameAlign.value())); |
| CA->eraseFromParent(); |
| } |
| |
| if (Shape.CoroSizes.empty()) |
| return; |
| |
| // In the same function all coro.sizes should have the same result type. |
| auto *SizeIntrin = Shape.CoroSizes.back(); |
| auto *SizeConstant = |
| ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape)); |
| |
| for (CoroSizeInst *CS : Shape.CoroSizes) { |
| CS->replaceAllUsesWith(SizeConstant); |
| CS->eraseFromParent(); |
| } |
| } |
| |
| static void postSplitCleanup(Function &F) { |
| removeUnreachableBlocks(F); |
| |
| #ifndef NDEBUG |
| // For now, we do a mandatory verification step because we don't |
| // entirely trust this pass. Note that we don't want to add a verifier |
| // pass to FPM below because it will also verify all the global data. |
| if (verifyFunction(F, &errs())) |
| report_fatal_error("Broken function"); |
| #endif |
| } |
| |
| // Coroutine has no suspend points. Remove heap allocation for the coroutine |
| // frame if possible. |
| static void handleNoSuspendCoroutine(coro::Shape &Shape) { |
| auto *CoroBegin = Shape.CoroBegin; |
| switch (Shape.ABI) { |
| case coro::ABI::Switch: { |
| auto SwitchId = Shape.getSwitchCoroId(); |
| auto *AllocInst = SwitchId->getCoroAlloc(); |
| coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); |
| if (AllocInst) { |
| IRBuilder<> Builder(AllocInst); |
| auto *Frame = Builder.CreateAlloca(Shape.FrameTy); |
| Frame->setAlignment(Shape.FrameAlign); |
| AllocInst->replaceAllUsesWith(Builder.getFalse()); |
| AllocInst->eraseFromParent(); |
| CoroBegin->replaceAllUsesWith(Frame); |
| } else { |
| CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); |
| } |
| |
| break; |
| } |
| case coro::ABI::Async: |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: |
| CoroBegin->replaceAllUsesWith(PoisonValue::get(CoroBegin->getType())); |
| break; |
| } |
| |
| CoroBegin->eraseFromParent(); |
| Shape.CoroBegin = nullptr; |
| } |
| |
| // SimplifySuspendPoint needs to check that there is no calls between |
| // coro_save and coro_suspend, since any of the calls may potentially resume |
| // the coroutine and if that is the case we cannot eliminate the suspend point. |
| static bool hasCallsInBlockBetween(iterator_range<BasicBlock::iterator> R) { |
| for (Instruction &I : R) { |
| // Assume that no intrinsic can resume the coroutine. |
| if (isa<IntrinsicInst>(I)) |
| continue; |
| |
| if (isa<CallBase>(I)) |
| return true; |
| } |
| return false; |
| } |
| |
| static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { |
| SmallPtrSet<BasicBlock *, 8> Set; |
| SmallVector<BasicBlock *, 8> Worklist; |
| |
| Set.insert(SaveBB); |
| Worklist.push_back(ResDesBB); |
| |
| // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr |
| // returns a token consumed by suspend instruction, all blocks in between |
| // will have to eventually hit SaveBB when going backwards from ResDesBB. |
| while (!Worklist.empty()) { |
| auto *BB = Worklist.pop_back_val(); |
| Set.insert(BB); |
| for (auto *Pred : predecessors(BB)) |
| if (!Set.contains(Pred)) |
| Worklist.push_back(Pred); |
| } |
| |
| // SaveBB and ResDesBB are checked separately in hasCallsBetween. |
| Set.erase(SaveBB); |
| Set.erase(ResDesBB); |
| |
| for (auto *BB : Set) |
| if (hasCallsInBlockBetween({BB->getFirstNonPHIIt(), BB->end()})) |
| return true; |
| |
| return false; |
| } |
| |
| static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { |
| auto *SaveBB = Save->getParent(); |
| auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); |
| BasicBlock::iterator SaveIt = Save->getIterator(); |
| BasicBlock::iterator ResumeOrDestroyIt = ResumeOrDestroy->getIterator(); |
| |
| if (SaveBB == ResumeOrDestroyBB) |
| return hasCallsInBlockBetween({std::next(SaveIt), ResumeOrDestroyIt}); |
| |
| // Any calls from Save to the end of the block? |
| if (hasCallsInBlockBetween({std::next(SaveIt), SaveBB->end()})) |
| return true; |
| |
| // Any calls from begging of the block up to ResumeOrDestroy? |
| if (hasCallsInBlockBetween( |
| {ResumeOrDestroyBB->getFirstNonPHIIt(), ResumeOrDestroyIt})) |
| return true; |
| |
| // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? |
| if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) |
| return true; |
| |
| return false; |
| } |
| |
| // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the |
| // suspend point and replace it with nornal control flow. |
| static bool simplifySuspendPoint(CoroSuspendInst *Suspend, |
| CoroBeginInst *CoroBegin) { |
| Instruction *Prev = Suspend->getPrevNode(); |
| if (!Prev) { |
| auto *Pred = Suspend->getParent()->getSinglePredecessor(); |
| if (!Pred) |
| return false; |
| Prev = Pred->getTerminator(); |
| } |
| |
| CallBase *CB = dyn_cast<CallBase>(Prev); |
| if (!CB) |
| return false; |
| |
| auto *Callee = CB->getCalledOperand()->stripPointerCasts(); |
| |
| // See if the callsite is for resumption or destruction of the coroutine. |
| auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); |
| if (!SubFn) |
| return false; |
| |
| // Does not refer to the current coroutine, we cannot do anything with it. |
| if (SubFn->getFrame() != CoroBegin) |
| return false; |
| |
| // See if the transformation is safe. Specifically, see if there are any |
| // calls in between Save and CallInstr. They can potenitally resume the |
| // coroutine rendering this optimization unsafe. |
| auto *Save = Suspend->getCoroSave(); |
| if (hasCallsBetween(Save, CB)) |
| return false; |
| |
| // Replace llvm.coro.suspend with the value that results in resumption over |
| // the resume or cleanup path. |
| Suspend->replaceAllUsesWith(SubFn->getRawIndex()); |
| Suspend->eraseFromParent(); |
| Save->eraseFromParent(); |
| |
| // No longer need a call to coro.resume or coro.destroy. |
| if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { |
| BranchInst::Create(Invoke->getNormalDest(), Invoke->getIterator()); |
| } |
| |
| // Grab the CalledValue from CB before erasing the CallInstr. |
| auto *CalledValue = CB->getCalledOperand(); |
| CB->eraseFromParent(); |
| |
| // If no more users remove it. Usually it is a bitcast of SubFn. |
| if (CalledValue != SubFn && CalledValue->user_empty()) |
| if (auto *I = dyn_cast<Instruction>(CalledValue)) |
| I->eraseFromParent(); |
| |
| // Now we are good to remove SubFn. |
| if (SubFn->user_empty()) |
| SubFn->eraseFromParent(); |
| |
| return true; |
| } |
| |
| // Remove suspend points that are simplified. |
| static void simplifySuspendPoints(coro::Shape &Shape) { |
| // Currently, the only simplification we do is switch-lowering-specific. |
| if (Shape.ABI != coro::ABI::Switch) |
| return; |
| |
| auto &S = Shape.CoroSuspends; |
| size_t I = 0, N = S.size(); |
| if (N == 0) |
| return; |
| |
| size_t ChangedFinalIndex = std::numeric_limits<size_t>::max(); |
| while (true) { |
| auto SI = cast<CoroSuspendInst>(S[I]); |
| // Leave final.suspend to handleFinalSuspend since it is undefined behavior |
| // to resume a coroutine suspended at the final suspend point. |
| if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { |
| if (--N == I) |
| break; |
| |
| std::swap(S[I], S[N]); |
| |
| if (cast<CoroSuspendInst>(S[I])->isFinal()) { |
| assert(Shape.SwitchLowering.HasFinalSuspend); |
| ChangedFinalIndex = I; |
| } |
| |
| continue; |
| } |
| if (++I == N) |
| break; |
| } |
| S.resize(N); |
| |
| // Maintain final.suspend in case final suspend was swapped. |
| // Due to we requrie the final suspend to be the last element of CoroSuspends. |
| if (ChangedFinalIndex < N) { |
| assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal()); |
| std::swap(S[ChangedFinalIndex], S.back()); |
| } |
| } |
| |
| namespace { |
| |
| struct SwitchCoroutineSplitter { |
| static void split(Function &F, coro::Shape &Shape, |
| SmallVectorImpl<Function *> &Clones, |
| TargetTransformInfo &TTI) { |
| assert(Shape.ABI == coro::ABI::Switch); |
| |
| MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; |
| |
| // Create a resume clone by cloning the body of the original function, |
| // setting new entry block and replacing coro.suspend an appropriate value |
| // to force resume or cleanup pass for every suspend point. |
| createResumeEntryBlock(F, Shape); |
| auto *ResumeClone = coro::SwitchCloner::createClone( |
| F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI, |
| CommonDebugInfo); |
| auto *DestroyClone = coro::SwitchCloner::createClone( |
| F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI, |
| CommonDebugInfo); |
| auto *CleanupClone = coro::SwitchCloner::createClone( |
| F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI, |
| CommonDebugInfo); |
| |
| postSplitCleanup(*ResumeClone); |
| postSplitCleanup(*DestroyClone); |
| postSplitCleanup(*CleanupClone); |
| |
| // Store addresses resume/destroy/cleanup functions in the coroutine frame. |
| updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); |
| |
| assert(Clones.empty()); |
| Clones.push_back(ResumeClone); |
| Clones.push_back(DestroyClone); |
| Clones.push_back(CleanupClone); |
| |
| // Create a constant array referring to resume/destroy/clone functions |
| // pointed by the last argument of @llvm.coro.info, so that CoroElide pass |
| // can determined correct function to call. |
| setCoroInfo(F, Shape, Clones); |
| } |
| |
| // Create a variant of ramp function that does not perform heap allocation |
| // for a switch ABI coroutine. |
| // |
| // The newly split `.noalloc` ramp function has the following differences: |
| // - Has one additional frame pointer parameter in lieu of dynamic |
| // allocation. |
| // - Suppressed allocations by replacing coro.alloc and coro.free. |
| static Function *createNoAllocVariant(Function &F, coro::Shape &Shape, |
| SmallVectorImpl<Function *> &Clones) { |
| assert(Shape.ABI == coro::ABI::Switch); |
| auto *OrigFnTy = F.getFunctionType(); |
| auto OldParams = OrigFnTy->params(); |
| |
| SmallVector<Type *> NewParams; |
| NewParams.reserve(OldParams.size() + 1); |
| NewParams.append(OldParams.begin(), OldParams.end()); |
| NewParams.push_back(PointerType::getUnqual(Shape.FrameTy->getContext())); |
| |
| auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams, |
| OrigFnTy->isVarArg()); |
| Function *NoAllocF = |
| Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc"); |
| |
| ValueToValueMapTy VMap; |
| unsigned int Idx = 0; |
| for (const auto &I : F.args()) { |
| VMap[&I] = NoAllocF->getArg(Idx++); |
| } |
| // We just appended the frame pointer as the last argument of the new |
| // function. |
| auto FrameIdx = NoAllocF->arg_size() - 1; |
| SmallVector<ReturnInst *, 4> Returns; |
| CloneFunctionInto(NoAllocF, &F, VMap, |
| CloneFunctionChangeType::LocalChangesOnly, Returns); |
| |
| if (Shape.CoroBegin) { |
| auto *NewCoroBegin = |
| cast_if_present<CoroBeginInst>(VMap[Shape.CoroBegin]); |
| auto *NewCoroId = cast<CoroIdInst>(NewCoroBegin->getId()); |
| coro::replaceCoroFree(NewCoroId, /*Elide=*/true); |
| coro::suppressCoroAllocs(NewCoroId); |
| NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(FrameIdx)); |
| NewCoroBegin->eraseFromParent(); |
| } |
| |
| Module *M = F.getParent(); |
| M->getFunctionList().insert(M->end(), NoAllocF); |
| |
| removeUnreachableBlocks(*NoAllocF); |
| auto NewAttrs = NoAllocF->getAttributes(); |
| // When we elide allocation, we read these attributes to determine the |
| // frame size and alignment. |
| addFramePointerAttrs(NewAttrs, NoAllocF->getContext(), FrameIdx, |
| Shape.FrameSize, Shape.FrameAlign, |
| /*NoAlias=*/false); |
| |
| NoAllocF->setAttributes(NewAttrs); |
| |
| Clones.push_back(NoAllocF); |
| // Reset the original function's coro info, make the new noalloc variant |
| // connected to the original ramp function. |
| setCoroInfo(F, Shape, Clones); |
| // After copying, set the linkage to internal linkage. Original function |
| // may have different linkage, but optimization dependent on this function |
| // generally relies on LTO. |
| NoAllocF->setLinkage(llvm::GlobalValue::InternalLinkage); |
| return NoAllocF; |
| } |
| |
| private: |
| // Create an entry block for a resume function with a switch that will jump to |
| // suspend points. |
| static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { |
| LLVMContext &C = F.getContext(); |
| |
| // resume.entry: |
| // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 |
| // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label |
| // %unreachable [ |
| // i32 0, label %resume.0 |
| // i32 1, label %resume.1 |
| // ... |
| // ] |
| |
| auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); |
| auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); |
| |
| IRBuilder<> Builder(NewEntry); |
| auto *FramePtr = Shape.FramePtr; |
| auto *FrameTy = Shape.FrameTy; |
| auto *GepIndex = Builder.CreateStructGEP( |
| FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); |
| auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); |
| auto *Switch = |
| Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); |
| Shape.SwitchLowering.ResumeSwitch = Switch; |
| |
| size_t SuspendIndex = 0; |
| for (auto *AnyS : Shape.CoroSuspends) { |
| auto *S = cast<CoroSuspendInst>(AnyS); |
| ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); |
| |
| // Replace CoroSave with a store to Index: |
| // %index.addr = getelementptr %f.frame... (index field number) |
| // store i32 %IndexVal, i32* %index.addr1 |
| auto *Save = S->getCoroSave(); |
| Builder.SetInsertPoint(Save); |
| if (S->isFinal()) { |
| // The coroutine should be marked done if it reaches the final suspend |
| // point. |
| markCoroutineAsDone(Builder, Shape, FramePtr); |
| } else { |
| auto *GepIndex = Builder.CreateStructGEP( |
| FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); |
| Builder.CreateStore(IndexVal, GepIndex); |
| } |
| |
| Save->replaceAllUsesWith(ConstantTokenNone::get(C)); |
| Save->eraseFromParent(); |
| |
| // Split block before and after coro.suspend and add a jump from an entry |
| // switch: |
| // |
| // whateverBB: |
| // whatever |
| // %0 = call i8 @llvm.coro.suspend(token none, i1 false) |
| // switch i8 %0, label %suspend[i8 0, label %resume |
| // i8 1, label %cleanup] |
| // becomes: |
| // |
| // whateverBB: |
| // whatever |
| // br label %resume.0.landing |
| // |
| // resume.0: ; <--- jump from the switch in the resume.entry |
| // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) |
| // br label %resume.0.landing |
| // |
| // resume.0.landing: |
| // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] |
| // switch i8 % 1, label %suspend [i8 0, label %resume |
| // i8 1, label %cleanup] |
| |
| auto *SuspendBB = S->getParent(); |
| auto *ResumeBB = |
| SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); |
| auto *LandingBB = ResumeBB->splitBasicBlock( |
| S->getNextNode(), ResumeBB->getName() + Twine(".landing")); |
| Switch->addCase(IndexVal, ResumeBB); |
| |
| cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); |
| auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, ""); |
| PN->insertBefore(LandingBB->begin()); |
| S->replaceAllUsesWith(PN); |
| PN->addIncoming(Builder.getInt8(-1), SuspendBB); |
| PN->addIncoming(S, ResumeBB); |
| |
| ++SuspendIndex; |
| } |
| |
| Builder.SetInsertPoint(UnreachBB); |
| Builder.CreateUnreachable(); |
| |
| Shape.SwitchLowering.ResumeEntryBlock = NewEntry; |
| } |
| |
| // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. |
| static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, |
| Function *DestroyFn, Function *CleanupFn) { |
| IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr()); |
| |
| auto *ResumeAddr = Builder.CreateStructGEP( |
| Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, |
| "resume.addr"); |
| Builder.CreateStore(ResumeFn, ResumeAddr); |
| |
| Value *DestroyOrCleanupFn = DestroyFn; |
| |
| CoroIdInst *CoroId = Shape.getSwitchCoroId(); |
| if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { |
| // If there is a CoroAlloc and it returns false (meaning we elide the |
| // allocation, use CleanupFn instead of DestroyFn). |
| DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); |
| } |
| |
| auto *DestroyAddr = Builder.CreateStructGEP( |
| Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, |
| "destroy.addr"); |
| Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); |
| } |
| |
| // Create a global constant array containing pointers to functions provided |
| // and set Info parameter of CoroBegin to point at this constant. Example: |
| // |
| // @f.resumers = internal constant [2 x void(%f.frame*)*] |
| // [void(%f.frame*)* @f.resume, void(%f.frame*)* |
| // @f.destroy] |
| // define void @f() { |
| // ... |
| // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, |
| // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to |
| // i8*)) |
| // |
| // Assumes that all the functions have the same signature. |
| static void setCoroInfo(Function &F, coro::Shape &Shape, |
| ArrayRef<Function *> Fns) { |
| // This only works under the switch-lowering ABI because coro elision |
| // only works on the switch-lowering ABI. |
| SmallVector<Constant *, 4> Args(Fns); |
| assert(!Args.empty()); |
| Function *Part = *Fns.begin(); |
| Module *M = Part->getParent(); |
| auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); |
| |
| auto *ConstVal = ConstantArray::get(ArrTy, Args); |
| auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, |
| GlobalVariable::PrivateLinkage, ConstVal, |
| F.getName() + Twine(".resumers")); |
| |
| // Update coro.begin instruction to refer to this constant. |
| LLVMContext &C = F.getContext(); |
| auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C)); |
| Shape.getSwitchCoroId()->setInfo(BC); |
| } |
| }; |
| |
| } // namespace |
| |
| static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, |
| Value *Continuation) { |
| auto *ResumeIntrinsic = Suspend->getResumeFunction(); |
| auto &Context = Suspend->getParent()->getParent()->getContext(); |
| auto *Int8PtrTy = PointerType::getUnqual(Context); |
| |
| IRBuilder<> Builder(ResumeIntrinsic); |
| auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); |
| ResumeIntrinsic->replaceAllUsesWith(Val); |
| ResumeIntrinsic->eraseFromParent(); |
| Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg, |
| PoisonValue::get(Int8PtrTy)); |
| } |
| |
| /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. |
| static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, |
| ArrayRef<Value *> FnArgs, |
| SmallVectorImpl<Value *> &CallArgs) { |
| size_t ArgIdx = 0; |
| for (auto *paramTy : FnTy->params()) { |
| assert(ArgIdx < FnArgs.size()); |
| if (paramTy != FnArgs[ArgIdx]->getType()) |
| CallArgs.push_back( |
| Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy)); |
| else |
| CallArgs.push_back(FnArgs[ArgIdx]); |
| ++ArgIdx; |
| } |
| } |
| |
| CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, |
| TargetTransformInfo &TTI, |
| ArrayRef<Value *> Arguments, |
| IRBuilder<> &Builder) { |
| auto *FnTy = MustTailCallFn->getFunctionType(); |
| // Coerce the arguments, llvm optimizations seem to ignore the types in |
| // vaarg functions and throws away casts in optimized mode. |
| SmallVector<Value *, 8> CallArgs; |
| coerceArguments(Builder, FnTy, Arguments, CallArgs); |
| |
| auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); |
| // Skip targets which don't support tail call. |
| if (TTI.supportsTailCallFor(TailCall)) { |
| TailCall->setTailCallKind(CallInst::TCK_MustTail); |
| } |
| TailCall->setDebugLoc(Loc); |
| TailCall->setCallingConv(MustTailCallFn->getCallingConv()); |
| return TailCall; |
| } |
| |
| void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, |
| SmallVectorImpl<Function *> &Clones, |
| TargetTransformInfo &TTI) { |
| assert(Shape.ABI == coro::ABI::Async); |
| assert(Clones.empty()); |
| // Reset various things that the optimizer might have decided it |
| // "knows" about the coroutine function due to not seeing a return. |
| F.removeFnAttr(Attribute::NoReturn); |
| F.removeRetAttr(Attribute::NoAlias); |
| F.removeRetAttr(Attribute::NonNull); |
| |
| auto &Context = F.getContext(); |
| auto *Int8PtrTy = PointerType::getUnqual(Context); |
| |
| auto *Id = Shape.getAsyncCoroId(); |
| IRBuilder<> Builder(Id); |
| |
| auto *FramePtr = Id->getStorage(); |
| FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); |
| FramePtr = Builder.CreateConstInBoundsGEP1_32( |
| Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, |
| "async.ctx.frameptr"); |
| |
| // Map all uses of llvm.coro.begin to the allocated frame pointer. |
| { |
| // Make sure we don't invalidate Shape.FramePtr. |
| TrackingVH<Value> Handle(Shape.FramePtr); |
| Shape.CoroBegin->replaceAllUsesWith(FramePtr); |
| Shape.FramePtr = Handle.getValPtr(); |
| } |
| |
| // Create all the functions in order after the main function. |
| auto NextF = std::next(F.getIterator()); |
| |
| // Create a continuation function for each of the suspend points. |
| Clones.reserve(Shape.CoroSuspends.size()); |
| for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { |
| auto *Suspend = cast<CoroSuspendAsyncInst>(CS); |
| |
| // Create the clone declaration. |
| auto ResumeNameSuffix = ".resume."; |
| auto ProjectionFunctionName = |
| Suspend->getAsyncContextProjectionFunction()->getName(); |
| bool UseSwiftMangling = false; |
| if (ProjectionFunctionName == "__swift_async_resume_project_context") { |
| ResumeNameSuffix = "TQ"; |
| UseSwiftMangling = true; |
| } else if (ProjectionFunctionName == "__swift_async_resume_get_context") { |
| ResumeNameSuffix = "TY"; |
| UseSwiftMangling = true; |
| } |
| auto *Continuation = createCloneDeclaration( |
| F, Shape, |
| UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_" |
| : ResumeNameSuffix + Twine(Idx), |
| NextF, Suspend); |
| Clones.push_back(Continuation); |
| |
| // Insert a branch to a new return block immediately before the suspend |
| // point. |
| auto *SuspendBB = Suspend->getParent(); |
| auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); |
| auto *Branch = cast<BranchInst>(SuspendBB->getTerminator()); |
| |
| // Place it before the first suspend. |
| auto *ReturnBB = |
| BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); |
| Branch->setSuccessor(0, ReturnBB); |
| |
| IRBuilder<> Builder(ReturnBB); |
| |
| // Insert the call to the tail call function and inline it. |
| auto *Fn = Suspend->getMustTailCallFunction(); |
| SmallVector<Value *, 8> Args(Suspend->args()); |
| auto FnArgs = ArrayRef<Value *>(Args).drop_front( |
| CoroSuspendAsyncInst::MustTailCallFuncArg + 1); |
| auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI, |
| FnArgs, Builder); |
| Builder.CreateRetVoid(); |
| InlineFunctionInfo FnInfo; |
| (void)InlineFunction(*TailCall, FnInfo); |
| |
| // Replace the lvm.coro.async.resume intrisic call. |
| replaceAsyncResumeFunction(Suspend, Continuation); |
| } |
| |
| assert(Clones.size() == Shape.CoroSuspends.size()); |
| |
| MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; |
| |
| for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { |
| auto *Suspend = CS; |
| auto *Clone = Clones[Idx]; |
| |
| coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, |
| Suspend, TTI, CommonDebugInfo); |
| } |
| } |
| |
| void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, |
| SmallVectorImpl<Function *> &Clones, |
| TargetTransformInfo &TTI) { |
| assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); |
| assert(Clones.empty()); |
| |
| // Reset various things that the optimizer might have decided it |
| // "knows" about the coroutine function due to not seeing a return. |
| F.removeFnAttr(Attribute::NoReturn); |
| F.removeRetAttr(Attribute::NoAlias); |
| F.removeRetAttr(Attribute::NonNull); |
| |
| // Allocate the frame. |
| auto *Id = Shape.getRetconCoroId(); |
| Value *RawFramePtr; |
| if (Shape.RetconLowering.IsFrameInlineInStorage) { |
| RawFramePtr = Id->getStorage(); |
| } else { |
| IRBuilder<> Builder(Id); |
| |
| // Determine the size of the frame. |
| const DataLayout &DL = F.getDataLayout(); |
| auto Size = DL.getTypeAllocSize(Shape.FrameTy); |
| |
| // Allocate. We don't need to update the call graph node because we're |
| // going to recompute it from scratch after splitting. |
| // FIXME: pass the required alignment |
| RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); |
| RawFramePtr = |
| Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); |
| |
| // Stash the allocated frame pointer in the continuation storage. |
| Builder.CreateStore(RawFramePtr, Id->getStorage()); |
| } |
| |
| // Map all uses of llvm.coro.begin to the allocated frame pointer. |
| { |
| // Make sure we don't invalidate Shape.FramePtr. |
| TrackingVH<Value> Handle(Shape.FramePtr); |
| Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); |
| Shape.FramePtr = Handle.getValPtr(); |
| } |
| |
| // Create a unique return block. |
| BasicBlock *ReturnBB = nullptr; |
| PHINode *ContinuationPhi = nullptr; |
| SmallVector<PHINode *, 4> ReturnPHIs; |
| |
| // Create all the functions in order after the main function. |
| auto NextF = std::next(F.getIterator()); |
| |
| // Create a continuation function for each of the suspend points. |
| Clones.reserve(Shape.CoroSuspends.size()); |
| for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { |
| auto Suspend = cast<CoroSuspendRetconInst>(CS); |
| |
| // Create the clone declaration. |
| auto Continuation = createCloneDeclaration( |
| F, Shape, ".resume." + Twine(Idx), NextF, nullptr); |
| Clones.push_back(Continuation); |
| |
| // Insert a branch to the unified return block immediately before |
| // the suspend point. |
| auto SuspendBB = Suspend->getParent(); |
| auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); |
| auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); |
| |
| // Create the unified return block. |
| if (!ReturnBB) { |
| // Place it before the first suspend. |
| ReturnBB = |
| BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); |
| Shape.RetconLowering.ReturnBlock = ReturnBB; |
| |
| IRBuilder<> Builder(ReturnBB); |
| |
| // First, the continuation. |
| ContinuationPhi = |
| Builder.CreatePHI(Continuation->getType(), Shape.CoroSuspends.size()); |
| |
| // Create PHIs for all other return values. |
| assert(ReturnPHIs.empty()); |
| |
| // Next, all the directly-yielded values. |
| for (auto *ResultTy : Shape.getRetconResultTypes()) |
| ReturnPHIs.push_back( |
| Builder.CreatePHI(ResultTy, Shape.CoroSuspends.size())); |
| |
| // Build the return value. |
| auto RetTy = F.getReturnType(); |
| |
| // Cast the continuation value if necessary. |
| // We can't rely on the types matching up because that type would |
| // have to be infinite. |
| auto CastedContinuationTy = |
| (ReturnPHIs.empty() ? RetTy : RetTy->getStructElementType(0)); |
| auto *CastedContinuation = |
| Builder.CreateBitCast(ContinuationPhi, CastedContinuationTy); |
| |
| Value *RetV = CastedContinuation; |
| if (!ReturnPHIs.empty()) { |
| auto ValueIdx = 0; |
| RetV = PoisonValue::get(RetTy); |
| RetV = Builder.CreateInsertValue(RetV, CastedContinuation, ValueIdx++); |
| |
| for (auto Phi : ReturnPHIs) |
| RetV = Builder.CreateInsertValue(RetV, Phi, ValueIdx++); |
| } |
| |
| Builder.CreateRet(RetV); |
| } |
| |
| // Branch to the return block. |
| Branch->setSuccessor(0, ReturnBB); |
| assert(ContinuationPhi); |
| ContinuationPhi->addIncoming(Continuation, SuspendBB); |
| for (auto [Phi, VUse] : |
| llvm::zip_equal(ReturnPHIs, Suspend->value_operands())) |
| Phi->addIncoming(VUse, SuspendBB); |
| } |
| |
| assert(Clones.size() == Shape.CoroSuspends.size()); |
| |
| MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; |
| |
| for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { |
| auto Suspend = CS; |
| auto Clone = Clones[Idx]; |
| |
| coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, |
| Suspend, TTI, CommonDebugInfo); |
| } |
| } |
| |
| namespace { |
| class PrettyStackTraceFunction : public PrettyStackTraceEntry { |
| Function &F; |
| |
| public: |
| PrettyStackTraceFunction(Function &F) : F(F) {} |
| void print(raw_ostream &OS) const override { |
| OS << "While splitting coroutine "; |
| F.printAsOperand(OS, /*print type*/ false, F.getParent()); |
| OS << "\n"; |
| } |
| }; |
| } // namespace |
| |
| /// Remove calls to llvm.coro.end in the original function. |
| static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { |
| if (Shape.ABI != coro::ABI::Switch) { |
| for (auto *End : Shape.CoroEnds) { |
| replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); |
| } |
| } else { |
| for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { |
| auto &Context = End->getContext(); |
| End->replaceAllUsesWith(ConstantInt::getFalse(Context)); |
| End->eraseFromParent(); |
| } |
| } |
| } |
| |
| static bool hasSafeElideCaller(Function &F) { |
| for (auto *U : F.users()) { |
| if (auto *CB = dyn_cast<CallBase>(U)) { |
| auto *Caller = CB->getFunction(); |
| if (Caller && Caller->isPresplitCoroutine() && |
| CB->hasFnAttr(llvm::Attribute::CoroElideSafe)) |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape, |
| SmallVectorImpl<Function *> &Clones, |
| TargetTransformInfo &TTI) { |
| SwitchCoroutineSplitter::split(F, Shape, Clones, TTI); |
| } |
| |
| static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, |
| coro::BaseABI &ABI, TargetTransformInfo &TTI, |
| bool OptimizeFrame) { |
| PrettyStackTraceFunction prettyStackTrace(F); |
| |
| auto &Shape = ABI.Shape; |
| assert(Shape.CoroBegin); |
| |
| lowerAwaitSuspends(F, Shape); |
| |
| simplifySuspendPoints(Shape); |
| |
| normalizeCoroutine(F, Shape, TTI); |
| ABI.buildCoroutineFrame(OptimizeFrame); |
| replaceFrameSizeAndAlignment(Shape); |
| |
| bool isNoSuspendCoroutine = Shape.CoroSuspends.empty(); |
| |
| bool shouldCreateNoAllocVariant = |
| !isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch && |
| hasSafeElideCaller(F) && !F.hasFnAttribute(llvm::Attribute::NoInline); |
| |
| // If there are no suspend points, no split required, just remove |
| // the allocation and deallocation blocks, they are not needed. |
| if (isNoSuspendCoroutine) { |
| handleNoSuspendCoroutine(Shape); |
| } else { |
| ABI.splitCoroutine(F, Shape, Clones, TTI); |
| } |
| |
| // Replace all the swifterror operations in the original function. |
| // This invalidates SwiftErrorOps in the Shape. |
| replaceSwiftErrorOps(F, Shape, nullptr); |
| |
| // Salvage debug intrinsics that point into the coroutine frame in the |
| // original function. The Cloner has already salvaged debug info in the new |
| // coroutine funclets. |
| SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; |
| auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F); |
| for (auto *DDI : DbgInsts) |
| coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/); |
| for (DbgVariableRecord *DVR : DbgVariableRecords) |
| coro::salvageDebugInfo(ArgToAllocaMap, *DVR, false /*UseEntryValue*/); |
| |
| removeCoroEndsFromRampFunction(Shape); |
| |
| if (shouldCreateNoAllocVariant) |
| SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones); |
| } |
| |
| static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit( |
| LazyCallGraph::Node &N, const coro::Shape &Shape, |
| const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, |
| LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, |
| FunctionAnalysisManager &FAM) { |
| |
| auto *CurrentSCC = &C; |
| if (!Clones.empty()) { |
| switch (Shape.ABI) { |
| case coro::ABI::Switch: |
| // Each clone in the Switch lowering is independent of the other clones. |
| // Let the LazyCallGraph know about each one separately. |
| for (Function *Clone : Clones) |
| CG.addSplitFunction(N.getFunction(), *Clone); |
| break; |
| case coro::ABI::Async: |
| case coro::ABI::Retcon: |
| case coro::ABI::RetconOnce: |
| // Each clone in the Async/Retcon lowering references of the other clones. |
| // Let the LazyCallGraph know about all of them at once. |
| if (!Clones.empty()) |
| CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones); |
| break; |
| } |
| |
| // Let the CGSCC infra handle the changes to the original function. |
| CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(CG, *CurrentSCC, N, AM, |
| UR, FAM); |
| } |
| |
| // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges |
| // to the split functions. |
| postSplitCleanup(N.getFunction()); |
| CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentSCC, N, |
| AM, UR, FAM); |
| return *CurrentSCC; |
| } |
| |
| /// Replace a call to llvm.coro.prepare.retcon. |
| static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, |
| LazyCallGraph::SCC &C) { |
| auto CastFn = Prepare->getArgOperand(0); // as an i8* |
| auto Fn = CastFn->stripPointerCasts(); // as its original type |
| |
| // Attempt to peephole this pattern: |
| // %0 = bitcast [[TYPE]] @some_function to i8* |
| // %1 = call @llvm.coro.prepare.retcon(i8* %0) |
| // %2 = bitcast %1 to [[TYPE]] |
| // ==> |
| // %2 = @some_function |
| for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { |
| // Look for bitcasts back to the original function type. |
| auto *Cast = dyn_cast<BitCastInst>(U.getUser()); |
| if (!Cast || Cast->getType() != Fn->getType()) |
| continue; |
| |
| // Replace and remove the cast. |
| Cast->replaceAllUsesWith(Fn); |
| Cast->eraseFromParent(); |
| } |
| |
| // Replace any remaining uses with the function as an i8*. |
| // This can never directly be a callee, so we don't need to update CG. |
| Prepare->replaceAllUsesWith(CastFn); |
| Prepare->eraseFromParent(); |
| |
| // Kill dead bitcasts. |
| while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { |
| if (!Cast->use_empty()) |
| break; |
| CastFn = Cast->getOperand(0); |
| Cast->eraseFromParent(); |
| } |
| } |
| |
| static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, |
| LazyCallGraph::SCC &C) { |
| bool Changed = false; |
| for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { |
| // Intrinsics can only be used in calls. |
| auto *Prepare = cast<CallInst>(P.getUser()); |
| replacePrepare(Prepare, CG, C); |
| Changed = true; |
| } |
| |
| return Changed; |
| } |
| |
| static void addPrepareFunction(const Module &M, |
| SmallVectorImpl<Function *> &Fns, |
| StringRef Name) { |
| auto *PrepareFn = M.getFunction(Name); |
| if (PrepareFn && !PrepareFn->use_empty()) |
| Fns.push_back(PrepareFn); |
| } |
| |
| static std::unique_ptr<coro::BaseABI> |
| CreateNewABI(Function &F, coro::Shape &S, |
| std::function<bool(Instruction &)> IsMatCallback, |
| const SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs) { |
| if (S.CoroBegin->hasCustomABI()) { |
| unsigned CustomABI = S.CoroBegin->getCustomABI(); |
| if (CustomABI >= GenCustomABIs.size()) |
| llvm_unreachable("Custom ABI not found amoung those specified"); |
| return GenCustomABIs[CustomABI](F, S); |
| } |
| |
| switch (S.ABI) { |
| case coro::ABI::Switch: |
| return std::make_unique<coro::SwitchABI>(F, S, IsMatCallback); |
| case coro::ABI::Async: |
| return std::make_unique<coro::AsyncABI>(F, S, IsMatCallback); |
| case coro::ABI::Retcon: |
| return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); |
| case coro::ABI::RetconOnce: |
| return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); |
| } |
| llvm_unreachable("Unknown ABI"); |
| } |
| |
| CoroSplitPass::CoroSplitPass(bool OptimizeFrame) |
| : CreateAndInitABI([](Function &F, coro::Shape &S) { |
| std::unique_ptr<coro::BaseABI> ABI = |
| CreateNewABI(F, S, coro::isTriviallyMaterializable, {}); |
| ABI->init(); |
| return ABI; |
| }), |
| OptimizeFrame(OptimizeFrame) {} |
| |
| CoroSplitPass::CoroSplitPass( |
| SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame) |
| : CreateAndInitABI([=](Function &F, coro::Shape &S) { |
| std::unique_ptr<coro::BaseABI> ABI = |
| CreateNewABI(F, S, coro::isTriviallyMaterializable, GenCustomABIs); |
| ABI->init(); |
| return ABI; |
| }), |
| OptimizeFrame(OptimizeFrame) {} |
| |
| // For back compatibility, constructor takes a materializable callback and |
| // creates a generator for an ABI with a modified materializable callback. |
| CoroSplitPass::CoroSplitPass(std::function<bool(Instruction &)> IsMatCallback, |
| bool OptimizeFrame) |
| : CreateAndInitABI([=](Function &F, coro::Shape &S) { |
| std::unique_ptr<coro::BaseABI> ABI = |
| CreateNewABI(F, S, IsMatCallback, {}); |
| ABI->init(); |
| return ABI; |
| }), |
| OptimizeFrame(OptimizeFrame) {} |
| |
| // For back compatibility, constructor takes a materializable callback and |
| // creates a generator for an ABI with a modified materializable callback. |
| CoroSplitPass::CoroSplitPass( |
| std::function<bool(Instruction &)> IsMatCallback, |
| SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame) |
| : CreateAndInitABI([=](Function &F, coro::Shape &S) { |
| std::unique_ptr<coro::BaseABI> ABI = |
| CreateNewABI(F, S, IsMatCallback, GenCustomABIs); |
| ABI->init(); |
| return ABI; |
| }), |
| OptimizeFrame(OptimizeFrame) {} |
| |
| PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, |
| CGSCCAnalysisManager &AM, |
| LazyCallGraph &CG, CGSCCUpdateResult &UR) { |
| // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a |
| // non-zero number of nodes, so we assume that here and grab the first |
| // node's function's module. |
| Module &M = *C.begin()->getFunction().getParent(); |
| auto &FAM = |
| AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); |
| |
| // Check for uses of llvm.coro.prepare.retcon/async. |
| SmallVector<Function *, 2> PrepareFns; |
| addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); |
| addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); |
| |
| // Find coroutines for processing. |
| SmallVector<LazyCallGraph::Node *> Coroutines; |
| for (LazyCallGraph::Node &N : C) |
| if (N.getFunction().isPresplitCoroutine()) |
| Coroutines.push_back(&N); |
| |
| if (Coroutines.empty() && PrepareFns.empty()) |
| return PreservedAnalyses::all(); |
| |
| auto *CurrentSCC = &C; |
| // Split all the coroutines. |
| for (LazyCallGraph::Node *N : Coroutines) { |
| Function &F = N->getFunction(); |
| LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() |
| << "\n"); |
| |
| // The suspend-crossing algorithm in buildCoroutineFrame gets tripped up |
| // by unreachable blocks, so remove them as a first pass. Remove the |
| // unreachable blocks before collecting intrinsics into Shape. |
| removeUnreachableBlocks(F); |
| |
| coro::Shape Shape(F); |
| if (!Shape.CoroBegin) |
| continue; |
| |
| F.setSplittedCoroutine(); |
| |
| std::unique_ptr<coro::BaseABI> ABI = CreateAndInitABI(F, Shape); |
| |
| SmallVector<Function *, 4> Clones; |
| auto &TTI = FAM.getResult<TargetIRAnalysis>(F); |
| doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame); |
| CurrentSCC = &updateCallGraphAfterCoroutineSplit( |
| *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM); |
| |
| auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); |
| ORE.emit([&]() { |
| return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) |
| << "Split '" << ore::NV("function", F.getName()) |
| << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize) |
| << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")"; |
| }); |
| |
| if (!Shape.CoroSuspends.empty()) { |
| // Run the CGSCC pipeline on the original and newly split functions. |
| UR.CWorklist.insert(CurrentSCC); |
| for (Function *Clone : Clones) |
| UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); |
| } |
| } |
| |
| for (auto *PrepareFn : PrepareFns) { |
| replaceAllPrepares(PrepareFn, CG, *CurrentSCC); |
| } |
| |
| return PreservedAnalyses::none(); |
| } |