lib/Format/MacroCallReconstructor.cpp - llvm-project/clang - Git at Google

 //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// This file contains the implementation of MacroCallReconstructor, which fits
 /// an reconstructed macro call to a parsed set of UnwrappedLines.
 ///
 //===----------------------------------------------------------------------===//

 #include "Macros.h"

 #include "UnwrappedLineParser.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Debug.h"
 #include <cassert>

 #define DEBUG_TYPE "format-reconstruct"

 namespace clang {
 namespace format {

 // Call \p Call for each token in the unwrapped line given, passing
 // the token, its parent and whether it is the first token in the line.
 template <typename T>
 void forEachToken(const UnwrappedLine &Line, const T &Call,
                   FormatToken *Parent = nullptr) {
   bool First = true;
   for (const auto &N : Line.Tokens) {
     Call(N.Tok, Parent, First, Line.Level);
     First = false;
     for (const auto &Child : N.Children)
       forEachToken(Child, Call, N.Tok);
   }
 }

 MacroCallReconstructor::MacroCallReconstructor(
     unsigned Level,
     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
         &ActiveExpansions)
     : Result(Level), IdToReconstructed(ActiveExpansions) {
   Result.Tokens.push_back(std::make_unique<LineNode>());
   ActiveReconstructedLines.push_back(&Result);
 }

 void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
   assert(State != Finalized);
   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First,
                          unsigned Level) { add(Token, Parent, First, Level); });
   assert(InProgress || finished());
 }

 UnwrappedLine MacroCallReconstructor::takeResult() && {
   finalize();
   assert(Result.Tokens.size() == 1 &&
          Result.Tokens.front()->Children.size() == 1);
   UnwrappedLine Final = createUnwrappedLine(
       *Result.Tokens.front()->Children.front(), Result.Level);
   assert(!Final.Tokens.empty());
   return Final;
 }

 // Reconstruct the position of the next \p Token, given its parent \p
 // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
 // is the first token in a given unwrapped line.
 void MacroCallReconstructor::add(FormatToken *Token,
                                  FormatToken *ExpandedParent, bool First,
                                  unsigned Level) {
   LLVM_DEBUG(
       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
                    << ", First: " << First << "\n");
   // In order to be able to find the correct parent in the reconstructed token
   // stream, we need to continue the last open reconstruction until we find the
   // given token if it is part of the reconstructed token stream.
   //
   // Note that hidden tokens can be part of the reconstructed stream in nested
   // macro calls.
   // For example, given
   //   #define C(x, y) x y
   //   #define B(x) {x}
   // And the call:
   //   C(a, B(b))
   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
   // found in the reconstructed token stream of that expansion level.
   // In the expanded token stream
   //   a {b}
   // 'b' is a child of '{'. We need to continue the open expansion of the ','
   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
   // so we later set the spelled token 'b' as a child of the ','.
   if (!ActiveExpansions.empty() && Token->MacroCtx &&
       (Token->MacroCtx->Role != MR_Hidden ||
        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
       First = true;
   }

   prepareParent(ExpandedParent, First, Level);

   if (Token->MacroCtx) {
     // If this token was generated by a macro call, add the reconstructed
     // equivalent of the token.
     reconstruct(Token);
   } else {
     // Otherwise, we add it to the current line.
     appendToken(Token);
   }
 }

 // Adjusts the stack of active reconstructed lines so we're ready to push
 // tokens. The tokens to be pushed are children of ExpandedParent in the
 // expanded code.
 //
 // This may entail:
 // - creating a new line, if the parent is on the active line
 // - popping active lines, if the parent is further up the stack
 //
 // Postcondition:
 // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
 // reconstructed replacement token as a parent (when possible) - that is, the
 // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
 // is the parent of ActiveReconstructedLines.back() in the reconstructed
 // unwrapped line.
 void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
                                            bool NewLine, unsigned Level) {
   LLVM_DEBUG({
     llvm::dbgs() << "ParentMap:\n";
     debugParentMap();
   });
   // We want to find the parent in the new unwrapped line, where the expanded
   // parent might have been replaced during reconstruction.
   FormatToken *Parent = getParentInResult(ExpandedParent);
   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
                           << (Parent ? Parent->TokenText : "<null>") << "\n");

   FormatToken *OpenMacroParent = nullptr;
   if (!MacroCallStructure.empty()) {
     // Inside a macro expansion, it is possible to lose track of the correct
     // parent - either because it is already popped, for example because it was
     // in a different macro argument (e.g. M({, })), or when we work on invalid
     // code.
     // Thus, we use the innermost macro call's parent as the parent at which
     // we stop; this allows us to stay within the macro expansion and keeps
     // any problems confined to the extent of the macro call.
     OpenMacroParent =
         getParentInResult(MacroCallStructure.back().MacroCallLParen);
     LLVM_DEBUG(llvm::dbgs()
                << "MacroCallLParen: "
                << MacroCallStructure.back().MacroCallLParen->TokenText
                << ", OpenMacroParent: "
                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
                << "\n");
   }
   if (NewLine ||
       (!ActiveReconstructedLines.back()->Tokens.empty() &&
        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
     // If we are at the first token in a new line, we want to also
     // create a new line in the resulting reconstructed unwrapped line.
     while (ActiveReconstructedLines.back()->Tokens.empty() ||
            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
                 OpenMacroParent)) {
       ActiveReconstructedLines.pop_back();
       assert(!ActiveReconstructedLines.empty());
     }
     assert(!ActiveReconstructedLines.empty());
     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
         std::make_unique<ReconstructedLine>(Level));
     ActiveReconstructedLines.push_back(
         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
   } else if (parentLine().Tokens.back()->Tok != Parent) {
     // If we're not the first token in a new line, pop lines until we find
     // the child of \c Parent in the stack.
     while (Parent != parentLine().Tokens.back()->Tok &&
            parentLine().Tokens.back()->Tok &&
            parentLine().Tokens.back()->Tok != OpenMacroParent) {
       ActiveReconstructedLines.pop_back();
       assert(!ActiveReconstructedLines.empty());
     }
   }
   assert(!ActiveReconstructedLines.empty());
 }

 // For a given \p Parent in the incoming expanded token stream, find the
 // corresponding parent in the output.
 FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
   if (!Mapped)
     return Parent;
   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
     Parent = Mapped;
   // If we use a different token than the parent in the expanded token stream
   // as parent, mark it as a special parent, so the formatting code knows it
   // needs to have its children formatted.
   Parent->MacroParent = true;
   return Parent;
 }

 // Reconstruct a \p Token that was expanded from a macro call.
 void MacroCallReconstructor::reconstruct(FormatToken *Token) {
   assert(Token->MacroCtx);
   // A single token can be the only result of a macro call:
   // Given: #define ID(x, y) ;
   // And the call: ID(<some>, <tokens>)
   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
   if (Token->MacroCtx->StartOfExpansion) {
     startReconstruction(Token);
     // If the order of tokens in the expanded token stream is not the
     // same as the order of tokens in the reconstructed stream, we need
     // to reconstruct tokens that arrive later in the stream.
     if (Token->MacroCtx->Role != MR_Hidden)
       reconstructActiveCallUntil(Token);
   }
   assert(!ActiveExpansions.empty());
   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
     if (Token->MacroCtx->Role != MR_Hidden) {
       // The current token in the reconstructed token stream must be the token
       // we're looking for - we either arrive here after startReconstruction,
       // which initiates the stream to the first token, or after
       // continueReconstructionUntil skipped until the expected token in the
       // reconstructed stream at the start of add(...).
       assert(ActiveExpansions.back().SpelledI->Tok == Token);
       processNextReconstructed();
     } else if (!currentLine()->Tokens.empty()) {
       // Map all hidden tokens to the last visible token in the output.
       // If the hidden token is a parent, we'll use the last visible
       // token as the parent of the hidden token's children.
       SpelledParentToReconstructedParent[Token] =
           currentLine()->Tokens.back()->Tok;
     } else {
       for (auto I = ActiveReconstructedLines.rbegin(),
                 E = ActiveReconstructedLines.rend();
            I != E; ++I) {
         if (!(*I)->Tokens.empty()) {
           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
           break;
         }
       }
     }
   }
   if (Token->MacroCtx->EndOfExpansion)
     endReconstruction(Token);
 }

 // Given a \p Token that starts an expansion, reconstruct the beginning of the
 // macro call.
 // For example, given: #define ID(x) x
 // And the call: ID(int a)
 // Reconstructs: ID(
 void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
   assert(Token->MacroCtx);
   assert(!Token->MacroCtx->ExpandedFrom.empty());
   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
 #ifndef NDEBUG
   // Check that the token's reconstruction stack matches our current
   // reconstruction stack.
   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
     assert(ActiveExpansions[I].ID ==
            Token->MacroCtx
                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
   }
 #endif
   // Start reconstruction for all calls for which this token is the first token
   // generated by the call.
   // Note that the token's expanded from stack is inside-to-outside, and the
   // expansions for which this token is not the first are the outermost ones.
   ArrayRef<FormatToken *> StartedMacros =
       ArrayRef(Token->MacroCtx->ExpandedFrom)
           .drop_back(ActiveExpansions.size());
   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
   // We reconstruct macro calls outside-to-inside.
   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
     // We found a macro call to be reconstructed; the next time our
     // reconstruction stack is empty we know we finished an reconstruction.
 #ifndef NDEBUG
     State = InProgress;
 #endif
     // Put the reconstructed macro call's token into our reconstruction stack.
     auto IU = IdToReconstructed.find(ID);
     assert(IU != IdToReconstructed.end());
     ActiveExpansions.push_back(
         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
     // Process the macro call's identifier.
     processNextReconstructed();
     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
       continue;
     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
       // Process the optional opening parenthesis.
       processNextReconstructed();
     }
   }
 }

 // Add all tokens in the reconstruction stream to the output until we find the
 // given \p Token.
 bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
   assert(!ActiveExpansions.empty());
   bool PassedMacroComma = false;
   // FIXME: If Token was already expanded earlier, due to
   // a change in order, we will not find it, but need to
   // skip it.
   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
          ActiveExpansions.back().SpelledI->Tok != Token) {
     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
   }
   return PassedMacroComma;
 }

 // End all reconstructions for which \p Token is the final token.
 void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
   assert(Token->MacroCtx &&
          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
     LLVM_DEBUG([&] {
       // Check all remaining tokens but the final closing parenthesis and
       // optional trailing comment were already reconstructed at an inner
       // expansion level.
       for (auto T = ActiveExpansions.back().SpelledI;
            T != ActiveExpansions.back().SpelledE; ++T) {
         FormatToken *Token = T->Tok;
         bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
                              std::next(T)->Tok->isTrailingComment()) &&
                             !Token->MacroCtx && Token->is(tok::r_paren);
         bool TrailingComment = Token->isTrailingComment();
         bool PreviousLevel =
             Token->MacroCtx &&
             (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
         if (!ClosingParen && !TrailingComment && !PreviousLevel)
           llvm::dbgs() << "At token: " << Token->TokenText << "\n";
         // In addition to the following cases, we can also run into this
         // when a macro call had more arguments than expected; in that case,
         // the comma and the remaining tokens in the macro call will
         // potentially end up in the line when we finish the expansion.
         // FIXME: Add the information which arguments are unused, and assert
         // one of the cases below plus reconstructed macro argument tokens.
         // assert(ClosingParen || TrailingComment || PreviousLevel);
       }
     }());
     // Handle the remaining open tokens:
     // - expand the closing parenthesis, if it exists, including an optional
     //   trailing comment
     // - handle tokens that were already reconstructed at an inner expansion
     //   level
     // - handle tokens when a macro call had more than the expected number of
     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
     //   up with the sequence ", b, c)" being open at the end of the
     //   reconstruction; we want to gracefully handle that case
     //
     // FIXME: See the above debug-check for what we will need to do to be
     // able to assert this.
     for (auto T = ActiveExpansions.back().SpelledI;
          T != ActiveExpansions.back().SpelledE; ++T) {
       processNextReconstructed();
     }
     ActiveExpansions.pop_back();
   }
 }

 void MacroCallReconstructor::debugParentMap() const {
   llvm::DenseSet<FormatToken *> Values;
   for (const auto &P : SpelledParentToReconstructedParent)
     Values.insert(P.second);

   for (const auto &P : SpelledParentToReconstructedParent) {
     if (Values.contains(P.first))
       continue;
     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
     for (auto I = SpelledParentToReconstructedParent.find(P.first),
               E = SpelledParentToReconstructedParent.end();
          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
     }
     llvm::dbgs() << "\n";
   }
 }

 // If visible, add the next token of the reconstructed token sequence to the
 // output. Returns whether reconstruction passed a comma that is part of a
 // macro call.
 bool MacroCallReconstructor::processNextReconstructed() {
   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
   ++ActiveExpansions.back().SpelledI;
   if (Token->MacroCtx) {
     // Skip tokens that are not part of the macro call.
     if (Token->MacroCtx->Role == MR_Hidden)
       return false;
     // Skip tokens we already expanded during an inner reconstruction.
     // For example, given: #define ID(x) {x}
     // And the call: ID(ID(f))
     // We get two reconstructions:
     // ID(f) -> {f}
     // ID({f}) -> {{f}}
     // We reconstruct f during the first reconstruction, and skip it during the
     // second reconstruction.
     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
       return false;
   }
   // Tokens that do not have a macro context are tokens in that are part of the
   // macro call that have not taken part in expansion.
   if (!Token->MacroCtx) {
     // Put the parentheses and commas of a macro call into the same line;
     // if the arguments produce new unwrapped lines, they will become children
     // of the corresponding opening parenthesis or comma tokens in the
     // reconstructed call.
     if (Token->is(tok::l_paren)) {
       MacroCallStructure.push_back(MacroCallState(
           currentLine(), parentLine().Tokens.back()->Tok, Token));
       // All tokens that are children of the previous line's last token in the
       // reconstructed token stream will now be children of the l_paren token.
       // For example, for the line containing the macro calls:
       //   auto x = ID({ID(2)});
       // We will build up a map <null> -> ( -> ( with the first and second
       // l_paren of the macro call respectively. New lines that come in with a
       // <null> parent will then become children of the l_paren token of the
       // currently innermost macro call.
       SpelledParentToReconstructedParent[MacroCallStructure.back()
                                              .ParentLastToken] = Token;
       appendToken(Token);
       prepareParent(Token, /*NewLine=*/true,
                     MacroCallStructure.back().Line->Level);
       Token->MacroParent = true;
       return false;
     }
     if (!MacroCallStructure.empty()) {
       if (Token->is(tok::comma)) {
         // Make new lines inside the next argument children of the comma token.
         SpelledParentToReconstructedParent
             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
         Token->MacroParent = true;
         appendToken(Token, MacroCallStructure.back().Line);
         prepareParent(Token, /*NewLine=*/true,
                       MacroCallStructure.back().Line->Level);
         return true;
       }
       if (Token->is(tok::r_paren)) {
         appendToken(Token, MacroCallStructure.back().Line);
         SpelledParentToReconstructedParent.erase(
             MacroCallStructure.back().ParentLastToken);
         MacroCallStructure.pop_back();
         return false;
       }
     }
   }
   // Note that any tokens that are tagged with MR_None have been passed as
   // arguments to the macro that have not been expanded, for example:
   // Given: #define ID(X) x
   // When calling: ID(a, b)
   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
   // Given that erroring out in this case would be disruptive, we continue
   // pushing the (unformatted) token.
   // FIXME: This can lead to unfortunate formatting decisions - give the user
   // a hint that their macro definition is broken.
   appendToken(Token);
   return false;
 }

 void MacroCallReconstructor::finalize() {
 #ifndef NDEBUG
   assert(State != Finalized && finished());
   State = Finalized;
 #endif

   // We created corresponding unwrapped lines for each incoming line as children
   // the the toplevel null token.
   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
   LLVM_DEBUG({
     llvm::dbgs() << "Finalizing reconstructed lines:\n";
     debug(Result, 0);
   });

   // The first line becomes the top level line in the resulting unwrapped line.
   LineNode &Top = *Result.Tokens.front();
   auto *I = Top.Children.begin();
   // Every subsequent line will become a child of the last token in the previous
   // line, which is the token prior to the first token in the line.
   LineNode *Last = (*I)->Tokens.back().get();
   ++I;
   for (auto *E = Top.Children.end(); I != E; ++I) {
     assert(Last->Children.empty());
     Last->Children.push_back(std::move(*I));

     // Mark the previous line's last token as generated by a macro expansion
     // so the formatting algorithm can take that into account.
     Last->Tok->MacroParent = true;

     Last = Last->Children.back()->Tokens.back().get();
   }
   Top.Children.resize(1);
 }

 void MacroCallReconstructor::appendToken(FormatToken *Token,
                                          ReconstructedLine *L) {
   L = L ? L : currentLine();
   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
   L->Tokens.push_back(std::make_unique<LineNode>(Token));
 }

 UnwrappedLine
 MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
                                             int Level) {
   UnwrappedLine Result;
   Result.Level = Level;
   for (const auto &N : Line.Tokens) {
     Result.Tokens.push_back(N->Tok);
     UnwrappedLineNode &Current = Result.Tokens.back();
     auto NumChildren =
         std::count_if(N->Children.begin(), N->Children.end(),
                       [](const auto &Child) { return !Child->Tokens.empty(); });
     if (NumChildren == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
       // If we only have one child, and the child is due to a macro expansion
       // (either attached to a left parenthesis or comma), merge the child into
       // the current line to prevent forced breaks for macro arguments.
       auto *Child = std::find_if(
           N->Children.begin(), N->Children.end(),
           [](const auto &Child) { return !Child->Tokens.empty(); });
       auto Line = createUnwrappedLine(**Child, Level);
       Result.Tokens.splice(Result.Tokens.end(), Line.Tokens);
     } else if (NumChildren > 0) {
       // When there are multiple children with different indent, make sure that
       // we indent them:
       // 1. One level below the current line's level.
       // 2. At the correct level relative to each other.
       unsigned MinChildLevel =
           std::min_element(N->Children.begin(), N->Children.end(),
                            [](const auto &E1, const auto &E2) {
                              return E1->Level < E2->Level;
                            })
               ->get()
               ->Level;
       for (const auto &Child : N->Children) {
         if (Child->Tokens.empty())
           continue;
         Current.Children.push_back(createUnwrappedLine(
             *Child, Level + 1 + (Child->Level - MinChildLevel)));
       }
     }
   }
   return Result;
 }

 void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
   for (int i = 0; i < Level; ++i)
     llvm::dbgs() << " ";
   for (const auto &N : Line.Tokens) {
     if (!N)
       continue;
     if (N->Tok)
       llvm::dbgs() << N->Tok->TokenText << " ";
     for (const auto &Child : N->Children) {
       llvm::dbgs() << "\n";
       debug(*Child, Level + 1);
       for (int i = 0; i < Level; ++i)
         llvm::dbgs() << " ";
     }
   }
   llvm::dbgs() << "\n";
 }

 MacroCallReconstructor::ReconstructedLine &
 MacroCallReconstructor::parentLine() {
   return **std::prev(std::prev(ActiveReconstructedLines.end()));
 }

 MacroCallReconstructor::ReconstructedLine *
 MacroCallReconstructor::currentLine() {
   return ActiveReconstructedLines.back();
 }

 MacroCallReconstructor::MacroCallState::MacroCallState(
     MacroCallReconstructor::ReconstructedLine *Line,
     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
     : Line(Line), ParentLastToken(ParentLastToken),
       MacroCallLParen(MacroCallLParen) {
   LLVM_DEBUG(
       llvm::dbgs() << "ParentLastToken: "
                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
                    << "\n");

   assert(MacroCallLParen->is(tok::l_paren));
 }

 } // namespace format
 } // namespace clang