| //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief This file contains the implementation of the UnwrappedLineParser, |
| /// which turns a stream of tokens into UnwrappedLines. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "UnwrappedLineParser.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include <algorithm> |
| |
| #define DEBUG_TYPE "format-parser" |
| |
| namespace clang { |
| namespace format { |
| |
| class FormatTokenSource { |
| public: |
| virtual ~FormatTokenSource() {} |
| virtual FormatToken *getNextToken() = 0; |
| |
| virtual unsigned getPosition() = 0; |
| virtual FormatToken *setPosition(unsigned Position) = 0; |
| }; |
| |
| namespace { |
| |
| class ScopedDeclarationState { |
| public: |
| ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, |
| bool MustBeDeclaration) |
| : Line(Line), Stack(Stack) { |
| Line.MustBeDeclaration = MustBeDeclaration; |
| Stack.push_back(MustBeDeclaration); |
| } |
| ~ScopedDeclarationState() { |
| Stack.pop_back(); |
| if (!Stack.empty()) |
| Line.MustBeDeclaration = Stack.back(); |
| else |
| Line.MustBeDeclaration = true; |
| } |
| |
| private: |
| UnwrappedLine &Line; |
| std::vector<bool> &Stack; |
| }; |
| |
| static bool isLineComment(const FormatToken &FormatTok) { |
| return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); |
| } |
| |
| // Checks if \p FormatTok is a line comment that continues the line comment |
| // \p Previous. The original column of \p MinColumnToken is used to determine |
| // whether \p FormatTok is indented enough to the right to continue \p Previous. |
| static bool continuesLineComment(const FormatToken &FormatTok, |
| const FormatToken *Previous, |
| const FormatToken *MinColumnToken) { |
| if (!Previous || !MinColumnToken) |
| return false; |
| unsigned MinContinueColumn = |
| MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); |
| return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && |
| isLineComment(*Previous) && |
| FormatTok.OriginalColumn >= MinContinueColumn; |
| } |
| |
| class ScopedMacroState : public FormatTokenSource { |
| public: |
| ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, |
| FormatToken *&ResetToken) |
| : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), |
| PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), |
| Token(nullptr), PreviousToken(nullptr) { |
| TokenSource = this; |
| Line.Level = 0; |
| Line.InPPDirective = true; |
| } |
| |
| ~ScopedMacroState() override { |
| TokenSource = PreviousTokenSource; |
| ResetToken = Token; |
| Line.InPPDirective = false; |
| Line.Level = PreviousLineLevel; |
| } |
| |
| FormatToken *getNextToken() override { |
| // The \c UnwrappedLineParser guards against this by never calling |
| // \c getNextToken() after it has encountered the first eof token. |
| assert(!eof()); |
| PreviousToken = Token; |
| Token = PreviousTokenSource->getNextToken(); |
| if (eof()) |
| return getFakeEOF(); |
| return Token; |
| } |
| |
| unsigned getPosition() override { return PreviousTokenSource->getPosition(); } |
| |
| FormatToken *setPosition(unsigned Position) override { |
| PreviousToken = nullptr; |
| Token = PreviousTokenSource->setPosition(Position); |
| return Token; |
| } |
| |
| private: |
| bool eof() { |
| return Token && Token->HasUnescapedNewline && |
| !continuesLineComment(*Token, PreviousToken, |
| /*MinColumnToken=*/PreviousToken); |
| } |
| |
| FormatToken *getFakeEOF() { |
| static bool EOFInitialized = false; |
| static FormatToken FormatTok; |
| if (!EOFInitialized) { |
| FormatTok.Tok.startToken(); |
| FormatTok.Tok.setKind(tok::eof); |
| EOFInitialized = true; |
| } |
| return &FormatTok; |
| } |
| |
| UnwrappedLine &Line; |
| FormatTokenSource *&TokenSource; |
| FormatToken *&ResetToken; |
| unsigned PreviousLineLevel; |
| FormatTokenSource *PreviousTokenSource; |
| |
| FormatToken *Token; |
| FormatToken *PreviousToken; |
| }; |
| |
| } // end anonymous namespace |
| |
| class ScopedLineState { |
| public: |
| ScopedLineState(UnwrappedLineParser &Parser, |
| bool SwitchToPreprocessorLines = false) |
| : Parser(Parser), OriginalLines(Parser.CurrentLines) { |
| if (SwitchToPreprocessorLines) |
| Parser.CurrentLines = &Parser.PreprocessorDirectives; |
| else if (!Parser.Line->Tokens.empty()) |
| Parser.CurrentLines = &Parser.Line->Tokens.back().Children; |
| PreBlockLine = std::move(Parser.Line); |
| Parser.Line = llvm::make_unique<UnwrappedLine>(); |
| Parser.Line->Level = PreBlockLine->Level; |
| Parser.Line->InPPDirective = PreBlockLine->InPPDirective; |
| } |
| |
| ~ScopedLineState() { |
| if (!Parser.Line->Tokens.empty()) { |
| Parser.addUnwrappedLine(); |
| } |
| assert(Parser.Line->Tokens.empty()); |
| Parser.Line = std::move(PreBlockLine); |
| if (Parser.CurrentLines == &Parser.PreprocessorDirectives) |
| Parser.MustBreakBeforeNextToken = true; |
| Parser.CurrentLines = OriginalLines; |
| } |
| |
| private: |
| UnwrappedLineParser &Parser; |
| |
| std::unique_ptr<UnwrappedLine> PreBlockLine; |
| SmallVectorImpl<UnwrappedLine> *OriginalLines; |
| }; |
| |
| class CompoundStatementIndenter { |
| public: |
| CompoundStatementIndenter(UnwrappedLineParser *Parser, |
| const FormatStyle &Style, unsigned &LineLevel) |
| : LineLevel(LineLevel), OldLineLevel(LineLevel) { |
| if (Style.BraceWrapping.AfterControlStatement) |
| Parser->addUnwrappedLine(); |
| if (Style.BraceWrapping.IndentBraces) |
| ++LineLevel; |
| } |
| ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } |
| |
| private: |
| unsigned &LineLevel; |
| unsigned OldLineLevel; |
| }; |
| |
| namespace { |
| |
| class IndexedTokenSource : public FormatTokenSource { |
| public: |
| IndexedTokenSource(ArrayRef<FormatToken *> Tokens) |
| : Tokens(Tokens), Position(-1) {} |
| |
| FormatToken *getNextToken() override { |
| ++Position; |
| return Tokens[Position]; |
| } |
| |
| unsigned getPosition() override { |
| assert(Position >= 0); |
| return Position; |
| } |
| |
| FormatToken *setPosition(unsigned P) override { |
| Position = P; |
| return Tokens[Position]; |
| } |
| |
| void reset() { Position = -1; } |
| |
| private: |
| ArrayRef<FormatToken *> Tokens; |
| int Position; |
| }; |
| |
| } // end anonymous namespace |
| |
| UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, |
| const AdditionalKeywords &Keywords, |
| unsigned FirstStartColumn, |
| ArrayRef<FormatToken *> Tokens, |
| UnwrappedLineConsumer &Callback) |
| : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), |
| CurrentLines(&Lines), Style(Style), Keywords(Keywords), |
| CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), |
| Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), |
| IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None |
| ? IG_Rejected |
| : IG_Inited), |
| IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} |
| |
| void UnwrappedLineParser::reset() { |
| PPBranchLevel = -1; |
| IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None |
| ? IG_Rejected |
| : IG_Inited; |
| IncludeGuardToken = nullptr; |
| Line.reset(new UnwrappedLine); |
| CommentsBeforeNextToken.clear(); |
| FormatTok = nullptr; |
| MustBreakBeforeNextToken = false; |
| PreprocessorDirectives.clear(); |
| CurrentLines = &Lines; |
| DeclarationScopeStack.clear(); |
| PPStack.clear(); |
| Line->FirstStartColumn = FirstStartColumn; |
| } |
| |
| void UnwrappedLineParser::parse() { |
| IndexedTokenSource TokenSource(AllTokens); |
| Line->FirstStartColumn = FirstStartColumn; |
| do { |
| DEBUG(llvm::dbgs() << "----\n"); |
| reset(); |
| Tokens = &TokenSource; |
| TokenSource.reset(); |
| |
| readToken(); |
| parseFile(); |
| |
| // If we found an include guard then all preprocessor directives (other than |
| // the guard) are over-indented by one. |
| if (IncludeGuard == IG_Found) |
| for (auto &Line : Lines) |
| if (Line.InPPDirective && Line.Level > 0) |
| --Line.Level; |
| |
| // Create line with eof token. |
| pushToken(FormatTok); |
| addUnwrappedLine(); |
| |
| for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), |
| E = Lines.end(); |
| I != E; ++I) { |
| Callback.consumeUnwrappedLine(*I); |
| } |
| Callback.finishRun(); |
| Lines.clear(); |
| while (!PPLevelBranchIndex.empty() && |
| PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { |
| PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); |
| PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); |
| } |
| if (!PPLevelBranchIndex.empty()) { |
| ++PPLevelBranchIndex.back(); |
| assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); |
| assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); |
| } |
| } while (!PPLevelBranchIndex.empty()); |
| } |
| |
| void UnwrappedLineParser::parseFile() { |
| // The top-level context in a file always has declarations, except for pre- |
| // processor directives and JavaScript files. |
| bool MustBeDeclaration = |
| !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; |
| ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, |
| MustBeDeclaration); |
| if (Style.Language == FormatStyle::LK_TextProto) |
| parseBracedList(); |
| else |
| parseLevel(/*HasOpeningBrace=*/false); |
| // Make sure to format the remaining tokens. |
| flushComments(true); |
| addUnwrappedLine(); |
| } |
| |
| void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { |
| bool SwitchLabelEncountered = false; |
| do { |
| tok::TokenKind kind = FormatTok->Tok.getKind(); |
| if (FormatTok->Type == TT_MacroBlockBegin) { |
| kind = tok::l_brace; |
| } else if (FormatTok->Type == TT_MacroBlockEnd) { |
| kind = tok::r_brace; |
| } |
| |
| switch (kind) { |
| case tok::comment: |
| nextToken(); |
| addUnwrappedLine(); |
| break; |
| case tok::l_brace: |
| // FIXME: Add parameter whether this can happen - if this happens, we must |
| // be in a non-declaration context. |
| if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) |
| continue; |
| parseBlock(/*MustBeDeclaration=*/false); |
| addUnwrappedLine(); |
| break; |
| case tok::r_brace: |
| if (HasOpeningBrace) |
| return; |
| nextToken(); |
| addUnwrappedLine(); |
| break; |
| case tok::kw_default: |
| case tok::kw_case: |
| if (Style.Language == FormatStyle::LK_JavaScript && |
| Line->MustBeDeclaration) { |
| // A 'case: string' style field declaration. |
| parseStructuralElement(); |
| break; |
| } |
| if (!SwitchLabelEncountered && |
| (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) |
| ++Line->Level; |
| SwitchLabelEncountered = true; |
| parseStructuralElement(); |
| break; |
| default: |
| parseStructuralElement(); |
| break; |
| } |
| } while (!eof()); |
| } |
| |
| void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { |
| // We'll parse forward through the tokens until we hit |
| // a closing brace or eof - note that getNextToken() will |
| // parse macros, so this will magically work inside macro |
| // definitions, too. |
| unsigned StoredPosition = Tokens->getPosition(); |
| FormatToken *Tok = FormatTok; |
| const FormatToken *PrevTok = Tok->Previous; |
| // Keep a stack of positions of lbrace tokens. We will |
| // update information about whether an lbrace starts a |
| // braced init list or a different block during the loop. |
| SmallVector<FormatToken *, 8> LBraceStack; |
| assert(Tok->Tok.is(tok::l_brace)); |
| do { |
| // Get next non-comment token. |
| FormatToken *NextTok; |
| unsigned ReadTokens = 0; |
| do { |
| NextTok = Tokens->getNextToken(); |
| ++ReadTokens; |
| } while (NextTok->is(tok::comment)); |
| |
| switch (Tok->Tok.getKind()) { |
| case tok::l_brace: |
| if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { |
| if (PrevTok->isOneOf(tok::colon, tok::less)) |
| // A ':' indicates this code is in a type, or a braced list |
| // following a label in an object literal ({a: {b: 1}}). |
| // A '<' could be an object used in a comparison, but that is nonsense |
| // code (can never return true), so more likely it is a generic type |
| // argument (`X<{a: string; b: number}>`). |
| // The code below could be confused by semicolons between the |
| // individual members in a type member list, which would normally |
| // trigger BK_Block. In both cases, this must be parsed as an inline |
| // braced init. |
| Tok->BlockKind = BK_BracedInit; |
| else if (PrevTok->is(tok::r_paren)) |
| // `) { }` can only occur in function or method declarations in JS. |
| Tok->BlockKind = BK_Block; |
| } else { |
| Tok->BlockKind = BK_Unknown; |
| } |
| LBraceStack.push_back(Tok); |
| break; |
| case tok::r_brace: |
| if (LBraceStack.empty()) |
| break; |
| if (LBraceStack.back()->BlockKind == BK_Unknown) { |
| bool ProbablyBracedList = false; |
| if (Style.Language == FormatStyle::LK_Proto) { |
| ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); |
| } else { |
| // Using OriginalColumn to distinguish between ObjC methods and |
| // binary operators is a bit hacky. |
| bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && |
| NextTok->OriginalColumn == 0; |
| |
| // If there is a comma, semicolon or right paren after the closing |
| // brace, we assume this is a braced initializer list. Note that |
| // regardless how we mark inner braces here, we will overwrite the |
| // BlockKind later if we parse a braced list (where all blocks |
| // inside are by default braced lists), or when we explicitly detect |
| // blocks (for example while parsing lambdas). |
| // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a |
| // braced list in JS. |
| ProbablyBracedList = |
| (Style.Language == FormatStyle::LK_JavaScript && |
| NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, |
| Keywords.kw_as)) || |
| (Style.isCpp() && NextTok->is(tok::l_paren)) || |
| NextTok->isOneOf(tok::comma, tok::period, tok::colon, |
| tok::r_paren, tok::r_square, tok::l_brace, |
| tok::l_square, tok::ellipsis) || |
| (NextTok->is(tok::identifier) && |
| !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || |
| (NextTok->is(tok::semi) && |
| (!ExpectClassBody || LBraceStack.size() != 1)) || |
| (NextTok->isBinaryOperator() && !NextIsObjCMethod); |
| } |
| if (ProbablyBracedList) { |
| Tok->BlockKind = BK_BracedInit; |
| LBraceStack.back()->BlockKind = BK_BracedInit; |
| } else { |
| Tok->BlockKind = BK_Block; |
| LBraceStack.back()->BlockKind = BK_Block; |
| } |
| } |
| LBraceStack.pop_back(); |
| break; |
| case tok::at: |
| case tok::semi: |
| case tok::kw_if: |
| case tok::kw_while: |
| case tok::kw_for: |
| case tok::kw_switch: |
| case tok::kw_try: |
| case tok::kw___try: |
| if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) |
| LBraceStack.back()->BlockKind = BK_Block; |
| break; |
| default: |
| break; |
| } |
| PrevTok = Tok; |
| Tok = NextTok; |
| } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); |
| |
| // Assume other blocks for all unclosed opening braces. |
| for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { |
| if (LBraceStack[i]->BlockKind == BK_Unknown) |
| LBraceStack[i]->BlockKind = BK_Block; |
| } |
| |
| FormatTok = Tokens->setPosition(StoredPosition); |
| } |
| |
| template <class T> |
| static inline void hash_combine(std::size_t &seed, const T &v) { |
| std::hash<T> hasher; |
| seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); |
| } |
| |
| size_t UnwrappedLineParser::computePPHash() const { |
| size_t h = 0; |
| for (const auto &i : PPStack) { |
| hash_combine(h, size_t(i.Kind)); |
| hash_combine(h, i.Line); |
| } |
| return h; |
| } |
| |
| void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, |
| bool MunchSemi) { |
| assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && |
| "'{' or macro block token expected"); |
| const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); |
| FormatTok->BlockKind = BK_Block; |
| |
| size_t PPStartHash = computePPHash(); |
| |
| unsigned InitialLevel = Line->Level; |
| nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); |
| |
| if (MacroBlock && FormatTok->is(tok::l_paren)) |
| parseParens(); |
| |
| size_t NbPreprocessorDirectives = |
| CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; |
| addUnwrappedLine(); |
| size_t OpeningLineIndex = |
| CurrentLines->empty() |
| ? (UnwrappedLine::kInvalidIndex) |
| : (CurrentLines->size() - 1 - NbPreprocessorDirectives); |
| |
| ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, |
| MustBeDeclaration); |
| if (AddLevel) |
| ++Line->Level; |
| parseLevel(/*HasOpeningBrace=*/true); |
| |
| if (eof()) |
| return; |
| |
| if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) |
| : !FormatTok->is(tok::r_brace)) { |
| Line->Level = InitialLevel; |
| FormatTok->BlockKind = BK_Block; |
| return; |
| } |
| |
| size_t PPEndHash = computePPHash(); |
| |
| // Munch the closing brace. |
| nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); |
| |
| if (MacroBlock && FormatTok->is(tok::l_paren)) |
| parseParens(); |
| |
| if (MunchSemi && FormatTok->Tok.is(tok::semi)) |
| nextToken(); |
| Line->Level = InitialLevel; |
| |
| if (PPStartHash == PPEndHash) { |
| Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; |
| if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { |
| // Update the opening line to add the forward reference as well |
| (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = |
| CurrentLines->size() - 1; |
| } |
| } |
| } |
| |
| static bool isGoogScope(const UnwrappedLine &Line) { |
| // FIXME: Closure-library specific stuff should not be hard-coded but be |
| // configurable. |
| if (Line.Tokens.size() < 4) |
| return false; |
| auto I = Line.Tokens.begin(); |
| if (I->Tok->TokenText != "goog") |
| return false; |
| ++I; |
| if (I->Tok->isNot(tok::period)) |
| return false; |
| ++I; |
| if (I->Tok->TokenText != "scope") |
| return false; |
| ++I; |
| return I->Tok->is(tok::l_paren); |
| } |
| |
| static bool isIIFE(const UnwrappedLine &Line, |
| const AdditionalKeywords &Keywords) { |
| // Look for the start of an immediately invoked anonymous function. |
| // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression |
| // This is commonly done in JavaScript to create a new, anonymous scope. |
| // Example: (function() { ... })() |
| if (Line.Tokens.size() < 3) |
| return false; |
| auto I = Line.Tokens.begin(); |
| if (I->Tok->isNot(tok::l_paren)) |
| return false; |
| ++I; |
| if (I->Tok->isNot(Keywords.kw_function)) |
| return false; |
| ++I; |
| return I->Tok->is(tok::l_paren); |
| } |
| |
| static bool ShouldBreakBeforeBrace(const FormatStyle &Style, |
| const FormatToken &InitialToken) { |
| if (InitialToken.is(tok::kw_namespace)) |
| return Style.BraceWrapping.AfterNamespace; |
| if (InitialToken.is(tok::kw_class)) |
| return Style.BraceWrapping.AfterClass; |
| if (InitialToken.is(tok::kw_union)) |
| return Style.BraceWrapping.AfterUnion; |
| if (InitialToken.is(tok::kw_struct)) |
| return Style.BraceWrapping.AfterStruct; |
| return false; |
| } |
| |
| void UnwrappedLineParser::parseChildBlock() { |
| FormatTok->BlockKind = BK_Block; |
| nextToken(); |
| { |
| bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && |
| (isGoogScope(*Line) || isIIFE(*Line, Keywords))); |
| ScopedLineState LineState(*this); |
| ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, |
| /*MustBeDeclaration=*/false); |
| Line->Level += SkipIndent ? 0 : 1; |
| parseLevel(/*HasOpeningBrace=*/true); |
| flushComments(isOnNewLine(*FormatTok)); |
| Line->Level -= SkipIndent ? 0 : 1; |
| } |
| nextToken(); |
| } |
| |
| void UnwrappedLineParser::parsePPDirective() { |
| assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); |
| ScopedMacroState MacroState(*Line, Tokens, FormatTok); |
| nextToken(); |
| |
| if (!FormatTok->Tok.getIdentifierInfo()) { |
| parsePPUnknown(); |
| return; |
| } |
| |
| switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { |
| case tok::pp_define: |
| parsePPDefine(); |
| return; |
| case tok::pp_if: |
| parsePPIf(/*IfDef=*/false); |
| break; |
| case tok::pp_ifdef: |
| case tok::pp_ifndef: |
| parsePPIf(/*IfDef=*/true); |
| break; |
| case tok::pp_else: |
| parsePPElse(); |
| break; |
| case tok::pp_elif: |
| parsePPElIf(); |
| break; |
| case tok::pp_endif: |
| parsePPEndIf(); |
| break; |
| default: |
| parsePPUnknown(); |
| break; |
| } |
| } |
| |
| void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { |
| size_t Line = CurrentLines->size(); |
| if (CurrentLines == &PreprocessorDirectives) |
| Line += Lines.size(); |
| |
| if (Unreachable || |
| (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) |
| PPStack.push_back({PP_Unreachable, Line}); |
| else |
| PPStack.push_back({PP_Conditional, Line}); |
| } |
| |
| void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { |
| ++PPBranchLevel; |
| assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); |
| if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { |
| PPLevelBranchIndex.push_back(0); |
| PPLevelBranchCount.push_back(0); |
| } |
| PPChainBranchIndex.push(0); |
| bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; |
| conditionalCompilationCondition(Unreachable || Skip); |
| } |
| |
| void UnwrappedLineParser::conditionalCompilationAlternative() { |
| if (!PPStack.empty()) |
| PPStack.pop_back(); |
| assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); |
| if (!PPChainBranchIndex.empty()) |
| ++PPChainBranchIndex.top(); |
| conditionalCompilationCondition( |
| PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && |
| PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); |
| } |
| |
| void UnwrappedLineParser::conditionalCompilationEnd() { |
| assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); |
| if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { |
| if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { |
| PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; |
| } |
| } |
| // Guard against #endif's without #if. |
| if (PPBranchLevel > -1) |
| --PPBranchLevel; |
| if (!PPChainBranchIndex.empty()) |
| PPChainBranchIndex.pop(); |
| if (!PPStack.empty()) |
| PPStack.pop_back(); |
| } |
| |
| void UnwrappedLineParser::parsePPIf(bool IfDef) { |
| bool IfNDef = FormatTok->is(tok::pp_ifndef); |
| nextToken(); |
| bool Unreachable = false; |
| if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) |
| Unreachable = true; |
| if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") |
| Unreachable = true; |
| conditionalCompilationStart(Unreachable); |
| FormatToken *IfCondition = FormatTok; |
| // If there's a #ifndef on the first line, and the only lines before it are |
| // comments, it could be an include guard. |
| bool MaybeIncludeGuard = IfNDef; |
| if (IncludeGuard == IG_Inited && MaybeIncludeGuard) |
| for (auto &Line : Lines) { |
| if (!Line.Tokens.front().Tok->is(tok::comment)) { |
| MaybeIncludeGuard = false; |
| IncludeGuard = IG_Rejected; |
| break; |
| } |
| } |
| --PPBranchLevel; |
| parsePPUnknown(); |
| ++PPBranchLevel; |
| if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { |
| IncludeGuard = IG_IfNdefed; |
| IncludeGuardToken = IfCondition; |
| } |
| } |
| |
| void UnwrappedLineParser::parsePPElse() { |
| // If a potential include guard has an #else, it's not an include guard. |
| if (IncludeGuard == IG_Defined && PPBranchLevel == 0) |
| IncludeGuard = IG_Rejected; |
| conditionalCompilationAlternative(); |
| if (PPBranchLevel > -1) |
| --PPBranchLevel; |
| parsePPUnknown(); |
| ++PPBranchLevel; |
| } |
| |
| void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } |
| |
| void UnwrappedLineParser::parsePPEndIf() { |
| conditionalCompilationEnd(); |
| parsePPUnknown(); |
| // If the #endif of a potential include guard is the last thing in the file, |
| // then we found an include guard. |
| unsigned TokenPosition = Tokens->getPosition(); |
| FormatToken *PeekNext = AllTokens[TokenPosition]; |
| if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && |
| PeekNext->is(tok::eof) && |
| Style.IndentPPDirectives != FormatStyle::PPDIS_None) |
| IncludeGuard = IG_Found; |
| } |
| |
| void UnwrappedLineParser::parsePPDefine() { |
| nextToken(); |
| |
| if (FormatTok->Tok.getKind() != tok::identifier) { |
| IncludeGuard = IG_Rejected; |
| IncludeGuardToken = nullptr; |
| parsePPUnknown(); |
| return; |
| } |
| |
| if (IncludeGuard == IG_IfNdefed && |
| IncludeGuardToken->TokenText == FormatTok->TokenText) { |
| IncludeGuard = IG_Defined; |
| IncludeGuardToken = nullptr; |
| for (auto &Line : Lines) { |
| if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { |
| IncludeGuard = IG_Rejected; |
| break; |
| } |
| } |
| } |
| |
| nextToken(); |
| if (FormatTok->Tok.getKind() == tok::l_paren && |
| FormatTok->WhitespaceRange.getBegin() == |
| FormatTok->WhitespaceRange.getEnd()) { |
| parseParens(); |
| } |
| if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) |
| Line->Level += PPBranchLevel + 1; |
| addUnwrappedLine(); |
| ++Line->Level; |
| |
| // Errors during a preprocessor directive can only affect the layout of the |
| // preprocessor directive, and thus we ignore them. An alternative approach |
| // would be to use the same approach we use on the file level (no |
| // re-indentation if there was a structural error) within the macro |
| // definition. |
| parseFile(); |
| } |
| |
| void UnwrappedLineParser::parsePPUnknown() { |
| do { |
| nextToken(); |
| } while (!eof()); |
| if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) |
| Line->Level += PPBranchLevel + 1; |
| addUnwrappedLine(); |
| } |
| |
| // Here we blacklist certain tokens that are not usually the first token in an |
| // unwrapped line. This is used in attempt to distinguish macro calls without |
| // trailing semicolons from other constructs split to several lines. |
| static bool tokenCanStartNewLine(const clang::Token &Tok) { |
| // Semicolon can be a null-statement, l_square can be a start of a macro or |
| // a C++11 attribute, but this doesn't seem to be common. |
| return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && |
| Tok.isNot(tok::l_square) && |
| // Tokens that can only be used as binary operators and a part of |
| // overloaded operator names. |
| Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && |
| Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && |
| Tok.isNot(tok::less) && Tok.isNot(tok::greater) && |
| Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && |
| Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && |
| Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && |
| Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && |
| Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && |
| Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && |
| Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && |
| Tok.isNot(tok::lesslessequal) && |
| // Colon is used in labels, base class lists, initializer lists, |
| // range-based for loops, ternary operator, but should never be the |
| // first token in an unwrapped line. |
| Tok.isNot(tok::colon) && |
| // 'noexcept' is a trailing annotation. |
| Tok.isNot(tok::kw_noexcept); |
| } |
| |
| static bool mustBeJSIdent(const AdditionalKeywords &Keywords, |
| const FormatToken *FormatTok) { |
| // FIXME: This returns true for C/C++ keywords like 'struct'. |
| return FormatTok->is(tok::identifier) && |
| (FormatTok->Tok.getIdentifierInfo() == nullptr || |
| !FormatTok->isOneOf( |
| Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, |
| Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, |
| Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, |
| Keywords.kw_let, Keywords.kw_var, tok::kw_const, |
| Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, |
| Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, |
| Keywords.kw_from)); |
| } |
| |
| static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, |
| const FormatToken *FormatTok) { |
| return FormatTok->Tok.isLiteral() || |
| FormatTok->isOneOf(tok::kw_true, tok::kw_false) || |
| mustBeJSIdent(Keywords, FormatTok); |
| } |
| |
| // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement |
| // when encountered after a value (see mustBeJSIdentOrValue). |
| static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, |
| const FormatToken *FormatTok) { |
| return FormatTok->isOneOf( |
| tok::kw_return, Keywords.kw_yield, |
| // conditionals |
| tok::kw_if, tok::kw_else, |
| // loops |
| tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, |
| // switch/case |
| tok::kw_switch, tok::kw_case, |
| // exceptions |
| tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, |
| // declaration |
| tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, |
| Keywords.kw_async, Keywords.kw_function, |
| // import/export |
| Keywords.kw_import, tok::kw_export); |
| } |
| |
| // readTokenWithJavaScriptASI reads the next token and terminates the current |
| // line if JavaScript Automatic Semicolon Insertion must |
| // happen between the current token and the next token. |
| // |
| // This method is conservative - it cannot cover all edge cases of JavaScript, |
| // but only aims to correctly handle certain well known cases. It *must not* |
| // return true in speculative cases. |
| void UnwrappedLineParser::readTokenWithJavaScriptASI() { |
| FormatToken *Previous = FormatTok; |
| readToken(); |
| FormatToken *Next = FormatTok; |
| |
| bool IsOnSameLine = |
| CommentsBeforeNextToken.empty() |
| ? Next->NewlinesBefore == 0 |
| : CommentsBeforeNextToken.front()->NewlinesBefore == 0; |
| if (IsOnSameLine) |
| return; |
| |
| bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); |
| bool PreviousStartsTemplateExpr = |
| Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); |
| if (PreviousMustBeValue || Previous->is(tok::r_paren)) { |
| // If the line contains an '@' sign, the previous token might be an |
| // annotation, which can precede another identifier/value. |
| bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), |
| [](UnwrappedLineNode &LineNode) { |
| return LineNode.Tok->is(tok::at); |
| }) != Line->Tokens.end(); |
| if (HasAt) |
| return; |
| } |
| if (Next->is(tok::exclaim) && PreviousMustBeValue) |
| return addUnwrappedLine(); |
| bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); |
| bool NextEndsTemplateExpr = |
| Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); |
| if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && |
| (PreviousMustBeValue || |
| Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, |
| tok::minusminus))) |
| return addUnwrappedLine(); |
| if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && |
| isJSDeclOrStmt(Keywords, Next)) |
| return addUnwrappedLine(); |
| } |
| |
| void UnwrappedLineParser::parseStructuralElement() { |
| assert(!FormatTok->is(tok::l_brace)); |
| if (Style.Language == FormatStyle::LK_TableGen && |
| FormatTok->is(tok::pp_include)) { |
| nextToken(); |
| if (FormatTok->is(tok::string_literal)) |
| nextToken(); |
| addUnwrappedLine(); |
| return; |
| } |
| switch (FormatTok->Tok.getKind()) { |
| case tok::at: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| nextToken(); |
| parseBracedList(); |
| break; |
| } |
| switch (FormatTok->Tok.getObjCKeywordID()) { |
| case tok::objc_public: |
| case tok::objc_protected: |
| case tok::objc_package: |
| case tok::objc_private: |
| return parseAccessSpecifier(); |
| case tok::objc_interface: |
| case tok::objc_implementation: |
| return parseObjCInterfaceOrImplementation(); |
| case tok::objc_protocol: |
| return parseObjCProtocol(); |
| case tok::objc_end: |
| return; // Handled by the caller. |
| case tok::objc_optional: |
| case tok::objc_required: |
| nextToken(); |
| addUnwrappedLine(); |
| return; |
| case tok::objc_autoreleasepool: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| if (Style.BraceWrapping.AfterObjCDeclaration) |
| addUnwrappedLine(); |
| parseBlock(/*MustBeDeclaration=*/false); |
| } |
| addUnwrappedLine(); |
| return; |
| case tok::objc_try: |
| // This branch isn't strictly necessary (the kw_try case below would |
| // do this too after the tok::at is parsed above). But be explicit. |
| parseTryCatch(); |
| return; |
| default: |
| break; |
| } |
| break; |
| case tok::kw_asm: |
| nextToken(); |
| if (FormatTok->is(tok::l_brace)) { |
| FormatTok->Type = TT_InlineASMBrace; |
| nextToken(); |
| while (FormatTok && FormatTok->isNot(tok::eof)) { |
| if (FormatTok->is(tok::r_brace)) { |
| FormatTok->Type = TT_InlineASMBrace; |
| nextToken(); |
| addUnwrappedLine(); |
| break; |
| } |
| FormatTok->Finalized = true; |
| nextToken(); |
| } |
| } |
| break; |
| case tok::kw_namespace: |
| parseNamespace(); |
| return; |
| case tok::kw_inline: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::kw_namespace)) { |
| parseNamespace(); |
| return; |
| } |
| break; |
| case tok::kw_public: |
| case tok::kw_protected: |
| case tok::kw_private: |
| if (Style.Language == FormatStyle::LK_Java || |
| Style.Language == FormatStyle::LK_JavaScript) |
| nextToken(); |
| else |
| parseAccessSpecifier(); |
| return; |
| case tok::kw_if: |
| parseIfThenElse(); |
| return; |
| case tok::kw_for: |
| case tok::kw_while: |
| parseForOrWhileLoop(); |
| return; |
| case tok::kw_do: |
| parseDoWhile(); |
| return; |
| case tok::kw_switch: |
| if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) |
| // 'switch: string' field declaration. |
| break; |
| parseSwitch(); |
| return; |
| case tok::kw_default: |
| if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) |
| // 'default: string' field declaration. |
| break; |
| nextToken(); |
| parseLabel(); |
| return; |
| case tok::kw_case: |
| if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) |
| // 'case: string' field declaration. |
| break; |
| parseCaseLabel(); |
| return; |
| case tok::kw_try: |
| case tok::kw___try: |
| parseTryCatch(); |
| return; |
| case tok::kw_extern: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::string_literal)) { |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| if (Style.BraceWrapping.AfterExternBlock) { |
| addUnwrappedLine(); |
| parseBlock(/*MustBeDeclaration=*/true); |
| } else { |
| parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); |
| } |
| addUnwrappedLine(); |
| return; |
| } |
| } |
| break; |
| case tok::kw_export: |
| if (Style.Language == FormatStyle::LK_JavaScript) { |
| parseJavaScriptEs6ImportExport(); |
| return; |
| } |
| break; |
| case tok::identifier: |
| if (FormatTok->is(TT_ForEachMacro)) { |
| parseForOrWhileLoop(); |
| return; |
| } |
| if (FormatTok->is(TT_MacroBlockBegin)) { |
| parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, |
| /*MunchSemi=*/false); |
| return; |
| } |
| if (FormatTok->is(Keywords.kw_import)) { |
| if (Style.Language == FormatStyle::LK_JavaScript) { |
| parseJavaScriptEs6ImportExport(); |
| return; |
| } |
| if (Style.Language == FormatStyle::LK_Proto) { |
| nextToken(); |
| if (FormatTok->is(tok::kw_public)) |
| nextToken(); |
| if (!FormatTok->is(tok::string_literal)) |
| return; |
| nextToken(); |
| if (FormatTok->is(tok::semi)) |
| nextToken(); |
| addUnwrappedLine(); |
| return; |
| } |
| } |
| if (Style.isCpp() && |
| FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, |
| Keywords.kw_slots, Keywords.kw_qslots)) { |
| nextToken(); |
| if (FormatTok->is(tok::colon)) { |
| nextToken(); |
| addUnwrappedLine(); |
| return; |
| } |
| } |
| // In all other cases, parse the declaration. |
| break; |
| default: |
| break; |
| } |
| do { |
| const FormatToken *Previous = FormatTok->Previous; |
| switch (FormatTok->Tok.getKind()) { |
| case tok::at: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| nextToken(); |
| parseBracedList(); |
| } |
| break; |
| case tok::kw_enum: |
| // Ignore if this is part of "template <enum ...". |
| if (Previous && Previous->is(tok::less)) { |
| nextToken(); |
| break; |
| } |
| |
| // parseEnum falls through and does not yet add an unwrapped line as an |
| // enum definition can start a structural element. |
| if (!parseEnum()) |
| break; |
| // This only applies for C++. |
| if (!Style.isCpp()) { |
| addUnwrappedLine(); |
| return; |
| } |
| break; |
| case tok::kw_typedef: |
| nextToken(); |
| if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, |
| Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) |
| parseEnum(); |
| break; |
| case tok::kw_struct: |
| case tok::kw_union: |
| case tok::kw_class: |
| // parseRecord falls through and does not yet add an unwrapped line as a |
| // record declaration or definition can start a structural element. |
| parseRecord(); |
| // This does not apply for Java and JavaScript. |
| if (Style.Language == FormatStyle::LK_Java || |
| Style.Language == FormatStyle::LK_JavaScript) { |
| if (FormatTok->is(tok::semi)) |
| nextToken(); |
| addUnwrappedLine(); |
| return; |
| } |
| break; |
| case tok::period: |
| nextToken(); |
| // In Java, classes have an implicit static member "class". |
| if (Style.Language == FormatStyle::LK_Java && FormatTok && |
| FormatTok->is(tok::kw_class)) |
| nextToken(); |
| if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && |
| FormatTok->Tok.getIdentifierInfo()) |
| // JavaScript only has pseudo keywords, all keywords are allowed to |
| // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 |
| nextToken(); |
| break; |
| case tok::semi: |
| nextToken(); |
| addUnwrappedLine(); |
| return; |
| case tok::r_brace: |
| addUnwrappedLine(); |
| return; |
| case tok::l_paren: |
| parseParens(); |
| break; |
| case tok::kw_operator: |
| nextToken(); |
| if (FormatTok->isBinaryOperator()) |
| nextToken(); |
| break; |
| case tok::caret: |
| nextToken(); |
| if (FormatTok->Tok.isAnyIdentifier() || |
| FormatTok->isSimpleTypeSpecifier()) |
| nextToken(); |
| if (FormatTok->is(tok::l_paren)) |
| parseParens(); |
| if (FormatTok->is(tok::l_brace)) |
| parseChildBlock(); |
| break; |
| case tok::l_brace: |
| if (!tryToParseBracedList()) { |
| // A block outside of parentheses must be the last part of a |
| // structural element. |
| // FIXME: Figure out cases where this is not true, and add projections |
| // for them (the one we know is missing are lambdas). |
| if (Style.BraceWrapping.AfterFunction) |
| addUnwrappedLine(); |
| FormatTok->Type = TT_FunctionLBrace; |
| parseBlock(/*MustBeDeclaration=*/false); |
| addUnwrappedLine(); |
| return; |
| } |
| // Otherwise this was a braced init list, and the structural |
| // element continues. |
| break; |
| case tok::kw_try: |
| // We arrive here when parsing function-try blocks. |
| parseTryCatch(); |
| return; |
| case tok::identifier: { |
| if (FormatTok->is(TT_MacroBlockEnd)) { |
| addUnwrappedLine(); |
| return; |
| } |
| |
| // Function declarations (as opposed to function expressions) are parsed |
| // on their own unwrapped line by continuing this loop. Function |
| // expressions (functions that are not on their own line) must not create |
| // a new unwrapped line, so they are special cased below. |
| size_t TokenCount = Line->Tokens.size(); |
| if (Style.Language == FormatStyle::LK_JavaScript && |
| FormatTok->is(Keywords.kw_function) && |
| (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( |
| Keywords.kw_async)))) { |
| tryToParseJSFunction(); |
| break; |
| } |
| if ((Style.Language == FormatStyle::LK_JavaScript || |
| Style.Language == FormatStyle::LK_Java) && |
| FormatTok->is(Keywords.kw_interface)) { |
| if (Style.Language == FormatStyle::LK_JavaScript) { |
| // In JavaScript/TypeScript, "interface" can be used as a standalone |
| // identifier, e.g. in `var interface = 1;`. If "interface" is |
| // followed by another identifier, it is very like to be an actual |
| // interface declaration. |
| unsigned StoredPosition = Tokens->getPosition(); |
| FormatToken *Next = Tokens->getNextToken(); |
| FormatTok = Tokens->setPosition(StoredPosition); |
| if (Next && !mustBeJSIdent(Keywords, Next)) { |
| nextToken(); |
| break; |
| } |
| } |
| parseRecord(); |
| addUnwrappedLine(); |
| return; |
| } |
| |
| // See if the following token should start a new unwrapped line. |
| StringRef Text = FormatTok->TokenText; |
| nextToken(); |
| if (Line->Tokens.size() == 1 && |
| // JS doesn't have macros, and within classes colons indicate fields, |
| // not labels. |
| Style.Language != FormatStyle::LK_JavaScript) { |
| if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { |
| Line->Tokens.begin()->Tok->MustBreakBefore = true; |
| parseLabel(); |
| return; |
| } |
| // Recognize function-like macro usages without trailing semicolon as |
| // well as free-standing macros like Q_OBJECT. |
| bool FunctionLike = FormatTok->is(tok::l_paren); |
| if (FunctionLike) |
| parseParens(); |
| |
| bool FollowedByNewline = |
| CommentsBeforeNextToken.empty() |
| ? FormatTok->NewlinesBefore > 0 |
| : CommentsBeforeNextToken.front()->NewlinesBefore > 0; |
| |
| if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && |
| tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { |
| addUnwrappedLine(); |
| return; |
| } |
| } |
| break; |
| } |
| case tok::equal: |
| // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType |
| // TT_JsFatArrow. The always start an expression or a child block if |
| // followed by a curly. |
| if (FormatTok->is(TT_JsFatArrow)) { |
| nextToken(); |
| if (FormatTok->is(tok::l_brace)) |
| parseChildBlock(); |
| break; |
| } |
| |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| nextToken(); |
| parseBracedList(); |
| } else if (Style.Language == FormatStyle::LK_Proto && |
| FormatTok->Tok.is(tok::less)) { |
| nextToken(); |
| parseBracedList(/*ContinueOnSemicolons=*/false, |
| /*ClosingBraceKind=*/tok::greater); |
| } |
| break; |
| case tok::l_square: |
| parseSquare(); |
| break; |
| case tok::kw_new: |
| parseNew(); |
| break; |
| default: |
| nextToken(); |
| break; |
| } |
| } while (!eof()); |
| } |
| |
| bool UnwrappedLineParser::tryToParseLambda() { |
| if (!Style.isCpp()) { |
| nextToken(); |
| return false; |
| } |
| assert(FormatTok->is(tok::l_square)); |
| FormatToken &LSquare = *FormatTok; |
| if (!tryToParseLambdaIntroducer()) |
| return false; |
| |
| while (FormatTok->isNot(tok::l_brace)) { |
| if (FormatTok->isSimpleTypeSpecifier()) { |
| nextToken(); |
| continue; |
| } |
| switch (FormatTok->Tok.getKind()) { |
| case tok::l_brace: |
| break; |
| case tok::l_paren: |
| parseParens(); |
| break; |
| case tok::amp: |
| case tok::star: |
| case tok::kw_const: |
| case tok::comma: |
| case tok::less: |
| case tok::greater: |
| case tok::identifier: |
| case tok::numeric_constant: |
| case tok::coloncolon: |
| case tok::kw_mutable: |
| nextToken(); |
| break; |
| case tok::arrow: |
| FormatTok->Type = TT_LambdaArrow; |
| nextToken(); |
| break; |
| default: |
| return true; |
| } |
| } |
| LSquare.Type = TT_LambdaLSquare; |
| parseChildBlock(); |
| return true; |
| } |
| |
| bool UnwrappedLineParser::tryToParseLambdaIntroducer() { |
| const FormatToken *Previous = FormatTok->Previous; |
| if (Previous && |
| (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, |
| tok::kw_delete) || |
| FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || |
| Previous->isSimpleTypeSpecifier())) { |
| nextToken(); |
| return false; |
| } |
| nextToken(); |
| parseSquare(/*LambdaIntroducer=*/true); |
| return true; |
| } |
| |
| void UnwrappedLineParser::tryToParseJSFunction() { |
| assert(FormatTok->is(Keywords.kw_function) || |
| FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); |
| if (FormatTok->is(Keywords.kw_async)) |
| nextToken(); |
| // Consume "function". |
| nextToken(); |
| |
| // Consume * (generator function). Treat it like C++'s overloaded operators. |
| if (FormatTok->is(tok::star)) { |
| FormatTok->Type = TT_OverloadedOperator; |
| nextToken(); |
| } |
| |
| // Consume function name. |
| if (FormatTok->is(tok::identifier)) |
| nextToken(); |
| |
| if (FormatTok->isNot(tok::l_paren)) |
| return; |
| |
| // Parse formal parameter list. |
| parseParens(); |
| |
| if (FormatTok->is(tok::colon)) { |
| // Parse a type definition. |
| nextToken(); |
| |
| // Eat the type declaration. For braced inline object types, balance braces, |
| // otherwise just parse until finding an l_brace for the function body. |
| if (FormatTok->is(tok::l_brace)) |
| tryToParseBracedList(); |
| else |
| while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) |
| nextToken(); |
| } |
| |
| if (FormatTok->is(tok::semi)) |
| return; |
| |
| parseChildBlock(); |
| } |
| |
| bool UnwrappedLineParser::tryToParseBracedList() { |
| if (FormatTok->BlockKind == BK_Unknown) |
| calculateBraceTypes(); |
| assert(FormatTok->BlockKind != BK_Unknown); |
| if (FormatTok->BlockKind == BK_Block) |
| return false; |
| nextToken(); |
| parseBracedList(); |
| return true; |
| } |
| |
| bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, |
| tok::TokenKind ClosingBraceKind) { |
| bool HasError = false; |
| |
| // FIXME: Once we have an expression parser in the UnwrappedLineParser, |
| // replace this by using parseAssigmentExpression() inside. |
| do { |
| if (Style.Language == FormatStyle::LK_JavaScript) { |
| if (FormatTok->is(Keywords.kw_function) || |
| FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { |
| tryToParseJSFunction(); |
| continue; |
| } |
| if (FormatTok->is(TT_JsFatArrow)) { |
| nextToken(); |
| // Fat arrows can be followed by simple expressions or by child blocks |
| // in curly braces. |
| if (FormatTok->is(tok::l_brace)) { |
| parseChildBlock(); |
| continue; |
| } |
| } |
| if (FormatTok->is(tok::l_brace)) { |
| // Could be a method inside of a braced list `{a() { return 1; }}`. |
| if (tryToParseBracedList()) |
| continue; |
| parseChildBlock(); |
| } |
| } |
| if (FormatTok->Tok.getKind() == ClosingBraceKind) { |
| nextToken(); |
| return !HasError; |
| } |
| switch (FormatTok->Tok.getKind()) { |
| case tok::caret: |
| nextToken(); |
| if (FormatTok->is(tok::l_brace)) { |
| parseChildBlock(); |
| } |
| break; |
| case tok::l_square: |
| tryToParseLambda(); |
| break; |
| case tok::l_paren: |
| parseParens(); |
| // JavaScript can just have free standing methods and getters/setters in |
| // object literals. Detect them by a "{" following ")". |
| if (Style.Language == FormatStyle::LK_JavaScript) { |
| if (FormatTok->is(tok::l_brace)) |
| parseChildBlock(); |
| break; |
| } |
| break; |
| case tok::l_brace: |
| // Assume there are no blocks inside a braced init list apart |
| // from the ones we explicitly parse out (like lambdas). |
| FormatTok->BlockKind = BK_BracedInit; |
| nextToken(); |
| parseBracedList(); |
| break; |
| case tok::less: |
| if (Style.Language == FormatStyle::LK_Proto) { |
| nextToken(); |
| parseBracedList(/*ContinueOnSemicolons=*/false, |
| /*ClosingBraceKind=*/tok::greater); |
| } else { |
| nextToken(); |
| } |
| break; |
| case tok::semi: |
| // JavaScript (or more precisely TypeScript) can have semicolons in braced |
| // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be |
| // used for error recovery if we have otherwise determined that this is |
| // a braced list. |
| if (Style.Language == FormatStyle::LK_JavaScript) { |
| nextToken(); |
| break; |
| } |
| HasError = true; |
| if (!ContinueOnSemicolons) |
| return !HasError; |
| nextToken(); |
| break; |
| case tok::comma: |
| nextToken(); |
| break; |
| default: |
| nextToken(); |
| break; |
| } |
| } while (!eof()); |
| return false; |
| } |
| |
| void UnwrappedLineParser::parseParens() { |
| assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); |
| nextToken(); |
| do { |
| switch (FormatTok->Tok.getKind()) { |
| case tok::l_paren: |
| parseParens(); |
| if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) |
| parseChildBlock(); |
| break; |
| case tok::r_paren: |
| nextToken(); |
| return; |
| case tok::r_brace: |
| // A "}" inside parenthesis is an error if there wasn't a matching "{". |
| return; |
| case tok::l_square: |
| tryToParseLambda(); |
| break; |
| case tok::l_brace: |
| if (!tryToParseBracedList()) |
| parseChildBlock(); |
| break; |
| case tok::at: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| nextToken(); |
| parseBracedList(); |
| } |
| break; |
| case tok::kw_class: |
| if (Style.Language == FormatStyle::LK_JavaScript) |
| parseRecord(/*ParseAsExpr=*/true); |
| else |
| nextToken(); |
| break; |
| case tok::identifier: |
| if (Style.Language == FormatStyle::LK_JavaScript && |
| (FormatTok->is(Keywords.kw_function) || |
| FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) |
| tryToParseJSFunction(); |
| else |
| nextToken(); |
| break; |
| default: |
| nextToken(); |
| break; |
| } |
| } while (!eof()); |
| } |
| |
| void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { |
| if (!LambdaIntroducer) { |
| assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); |
| if (tryToParseLambda()) |
| return; |
| } |
| do { |
| switch (FormatTok->Tok.getKind()) { |
| case tok::l_paren: |
| parseParens(); |
| break; |
| case tok::r_square: |
| nextToken(); |
| return; |
| case tok::r_brace: |
| // A "}" inside parenthesis is an error if there wasn't a matching "{". |
| return; |
| case tok::l_square: |
| parseSquare(); |
| break; |
| case tok::l_brace: { |
| if (!tryToParseBracedList()) |
| parseChildBlock(); |
| break; |
| } |
| case tok::at: |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| nextToken(); |
| parseBracedList(); |
| } |
| break; |
| default: |
| nextToken(); |
| break; |
| } |
| } while (!eof()); |
| } |
| |
| void UnwrappedLineParser::parseIfThenElse() { |
| assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); |
| nextToken(); |
| if (FormatTok->Tok.is(tok::kw_constexpr)) |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_paren)) |
| parseParens(); |
| bool NeedsUnwrappedLine = false; |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| if (Style.BraceWrapping.BeforeElse) |
| addUnwrappedLine(); |
| else |
| NeedsUnwrappedLine = true; |
| } else { |
| addUnwrappedLine(); |
| ++Line->Level; |
| parseStructuralElement(); |
| --Line->Level; |
| } |
| if (FormatTok->Tok.is(tok::kw_else)) { |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| addUnwrappedLine(); |
| } else if (FormatTok->Tok.is(tok::kw_if)) { |
| parseIfThenElse(); |
| } else { |
| addUnwrappedLine(); |
| ++Line->Level; |
| parseStructuralElement(); |
| if (FormatTok->is(tok::eof)) |
| addUnwrappedLine(); |
| --Line->Level; |
| } |
| } else if (NeedsUnwrappedLine) { |
| addUnwrappedLine(); |
| } |
| } |
| |
| void UnwrappedLineParser::parseTryCatch() { |
| assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); |
| nextToken(); |
| bool NeedsUnwrappedLine = false; |
| if (FormatTok->is(tok::colon)) { |
| // We are in a function try block, what comes is an initializer list. |
| nextToken(); |
| while (FormatTok->is(tok::identifier)) { |
| nextToken(); |
| if (FormatTok->is(tok::l_paren)) |
| parseParens(); |
| if (FormatTok->is(tok::comma)) |
| nextToken(); |
| } |
| } |
| // Parse try with resource. |
| if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { |
| parseParens(); |
| } |
| if (FormatTok->is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| if (Style.BraceWrapping.BeforeCatch) { |
| addUnwrappedLine(); |
| } else { |
| NeedsUnwrappedLine = true; |
| } |
| } else if (!FormatTok->is(tok::kw_catch)) { |
| // The C++ standard requires a compound-statement after a try. |
| // If there's none, we try to assume there's a structuralElement |
| // and try to continue. |
| addUnwrappedLine(); |
| ++Line->Level; |
| parseStructuralElement(); |
| --Line->Level; |
| } |
| while (1) { |
| if (FormatTok->is(tok::at)) |
| nextToken(); |
| if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, |
| tok::kw___finally) || |
| ((Style.Language == FormatStyle::LK_Java || |
| Style.Language == FormatStyle::LK_JavaScript) && |
| FormatTok->is(Keywords.kw_finally)) || |
| (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || |
| FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) |
| break; |
| nextToken(); |
| while (FormatTok->isNot(tok::l_brace)) { |
| if (FormatTok->is(tok::l_paren)) { |
| parseParens(); |
| continue; |
| } |
| if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) |
| return; |
| nextToken(); |
| } |
| NeedsUnwrappedLine = false; |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| if (Style.BraceWrapping.BeforeCatch) |
| addUnwrappedLine(); |
| else |
| NeedsUnwrappedLine = true; |
| } |
| if (NeedsUnwrappedLine) |
| addUnwrappedLine(); |
| } |
| |
| void UnwrappedLineParser::parseNamespace() { |
| assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); |
| |
| const FormatToken &InitialToken = *FormatTok; |
| nextToken(); |
| while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| if (ShouldBreakBeforeBrace(Style, InitialToken)) |
| addUnwrappedLine(); |
| |
| bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || |
| (Style.NamespaceIndentation == FormatStyle::NI_Inner && |
| DeclarationScopeStack.size() > 1); |
| parseBlock(/*MustBeDeclaration=*/true, AddLevel); |
| // Munch the semicolon after a namespace. This is more common than one would |
| // think. Puttin the semicolon into its own line is very ugly. |
| if (FormatTok->Tok.is(tok::semi)) |
| nextToken(); |
| addUnwrappedLine(); |
| } |
| // FIXME: Add error handling. |
| } |
| |
| void UnwrappedLineParser::parseNew() { |
| assert(FormatTok->is(tok::kw_new) && "'new' expected"); |
| nextToken(); |
| if (Style.Language != FormatStyle::LK_Java) |
| return; |
| |
| // In Java, we can parse everything up to the parens, which aren't optional. |
| do { |
| // There should not be a ;, { or } before the new's open paren. |
| if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) |
| return; |
| |
| // Consume the parens. |
| if (FormatTok->is(tok::l_paren)) { |
| parseParens(); |
| |
| // If there is a class body of an anonymous class, consume that as child. |
| if (FormatTok->is(tok::l_brace)) |
| parseChildBlock(); |
| return; |
| } |
| nextToken(); |
| } while (!eof()); |
| } |
| |
| void UnwrappedLineParser::parseForOrWhileLoop() { |
| assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && |
| "'for', 'while' or foreach macro expected"); |
| nextToken(); |
| // JS' for await ( ... |
| if (Style.Language == FormatStyle::LK_JavaScript && |
| FormatTok->is(Keywords.kw_await)) |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_paren)) |
| parseParens(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| addUnwrappedLine(); |
| } else { |
| addUnwrappedLine(); |
| ++Line->Level; |
| parseStructuralElement(); |
| --Line->Level; |
| } |
| } |
| |
| void UnwrappedLineParser::parseDoWhile() { |
| assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| if (Style.BraceWrapping.IndentBraces) |
| addUnwrappedLine(); |
| } else { |
| addUnwrappedLine(); |
| ++Line->Level; |
| parseStructuralElement(); |
| --Line->Level; |
| } |
| |
| // FIXME: Add error handling. |
| if (!FormatTok->Tok.is(tok::kw_while)) { |
| addUnwrappedLine(); |
| return; |
| } |
| |
| nextToken(); |
| parseStructuralElement(); |
| } |
| |
| void UnwrappedLineParser::parseLabel() { |
| nextToken(); |
| unsigned OldLineLevel = Line->Level; |
| if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) |
| --Line->Level; |
| if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| if (FormatTok->Tok.is(tok::kw_break)) { |
| if (Style.BraceWrapping.AfterControlStatement) |
| addUnwrappedLine(); |
| parseStructuralElement(); |
| } |
| addUnwrappedLine(); |
| } else { |
| if (FormatTok->is(tok::semi)) |
| nextToken(); |
| addUnwrappedLine(); |
| } |
| Line->Level = OldLineLevel; |
| if (FormatTok->isNot(tok::l_brace)) { |
| parseStructuralElement(); |
| addUnwrappedLine(); |
| } |
| } |
| |
| void UnwrappedLineParser::parseCaseLabel() { |
| assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); |
| // FIXME: fix handling of complex expressions here. |
| do { |
| nextToken(); |
| } while (!eof() && !FormatTok->Tok.is(tok::colon)); |
| parseLabel(); |
| } |
| |
| void UnwrappedLineParser::parseSwitch() { |
| assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); |
| nextToken(); |
| if (FormatTok->Tok.is(tok::l_paren)) |
| parseParens(); |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| CompoundStatementIndenter Indenter(this, Style, Line->Level); |
| parseBlock(/*MustBeDeclaration=*/false); |
| addUnwrappedLine(); |
| } else { |
| addUnwrappedLine(); |
| ++Line->Level; |
| parseStructuralElement(); |
| --Line->Level; |
| } |
| } |
| |
| void UnwrappedLineParser::parseAccessSpecifier() { |
| nextToken(); |
| // Understand Qt's slots. |
| if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) |
| nextToken(); |
| // Otherwise, we don't know what it is, and we'd better keep the next token. |
| if (FormatTok->Tok.is(tok::colon)) |
| nextToken(); |
| addUnwrappedLine(); |
| } |
| |
| bool UnwrappedLineParser::parseEnum() { |
| // Won't be 'enum' for NS_ENUMs. |
| if (FormatTok->Tok.is(tok::kw_enum)) |
| nextToken(); |
| |
| // In TypeScript, "enum" can also be used as property name, e.g. in interface |
| // declarations. An "enum" keyword followed by a colon would be a syntax |
| // error and thus assume it is just an identifier. |
| if (Style.Language == FormatStyle::LK_JavaScript && |
| FormatTok->isOneOf(tok::colon, tok::question)) |
| return false; |
| |
| // Eat up enum class ... |
| if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) |
| nextToken(); |
| |
| while (FormatTok->Tok.getIdentifierInfo() || |
| FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, |
| tok::greater, tok::comma, tok::question)) { |
| nextToken(); |
| // We can have macros or attributes in between 'enum' and the enum name. |
| if (FormatTok->is(tok::l_paren)) |
| parseParens(); |
| if (FormatTok->is(tok::identifier)) { |
| nextToken(); |
| // If there are two identifiers in a row, this is likely an elaborate |
| // return type. In Java, this can be "implements", etc. |
| if (Style.isCpp() && FormatTok->is(tok::identifier)) |
| return false; |
| } |
| } |
| |
| // Just a declaration or something is wrong. |
| if (FormatTok->isNot(tok::l_brace)) |
| return true; |
| FormatTok->BlockKind = BK_Block; |
| |
| if (Style.Language == FormatStyle::LK_Java) { |
| // Java enums are different. |
| parseJavaEnumBody(); |
| return true; |
| } |
| if (Style.Language == FormatStyle::LK_Proto) { |
| parseBlock(/*MustBeDeclaration=*/true); |
| return true; |
| } |
| |
| // Parse enum body. |
| nextToken(); |
| bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); |
| if (HasError) { |
| if (FormatTok->is(tok::semi)) |
| nextToken(); |
| addUnwrappedLine(); |
| } |
| return true; |
| |
| // There is no addUnwrappedLine() here so that we fall through to parsing a |
| // structural element afterwards. Thus, in "enum A {} n, m;", |
| // "} n, m;" will end up in one unwrapped line. |
| } |
| |
| void UnwrappedLineParser::parseJavaEnumBody() { |
| // Determine whether the enum is simple, i.e. does not have a semicolon or |
| // constants with class bodies. Simple enums can be formatted like braced |
| // lists, contracted to a single line, etc. |
| unsigned StoredPosition = Tokens->getPosition(); |
| bool IsSimple = true; |
| FormatToken *Tok = Tokens->getNextToken(); |
| while (Tok) { |
| if (Tok->is(tok::r_brace)) |
| break; |
| if (Tok->isOneOf(tok::l_brace, tok::semi)) { |
| IsSimple = false; |
| break; |
| } |
| // FIXME: This will also mark enums with braces in the arguments to enum |
| // constants as "not simple". This is probably fine in practice, though. |
| Tok = Tokens->getNextToken(); |
| } |
| FormatTok = Tokens->setPosition(StoredPosition); |
| |
| if (IsSimple) { |
| nextToken(); |
| parseBracedList(); |
| addUnwrappedLine(); |
| return; |
| } |
| |
| // Parse the body of a more complex enum. |
| // First add a line for everything up to the "{". |
| nextToken(); |
| addUnwrappedLine(); |
| ++Line->Level; |
| |
| // Parse the enum constants. |
| while (FormatTok) { |
| if (FormatTok->is(tok::l_brace)) { |
| // Parse the constant's class body. |
| parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, |
| /*MunchSemi=*/false); |
| } else if (FormatTok->is(tok::l_paren)) { |
| parseParens(); |
| } else if (FormatTok->is(tok::comma)) { |
| nextToken(); |
| addUnwrappedLine(); |
| } else if (FormatTok->is(tok::semi)) { |
| nextToken(); |
| addUnwrappedLine(); |
| break; |
| } else if (FormatTok->is(tok::r_brace)) { |
| addUnwrappedLine(); |
| break; |
| } else { |
| nextToken(); |
| } |
| } |
| |
| // Parse the class body after the enum's ";" if any. |
| parseLevel(/*HasOpeningBrace=*/true); |
| nextToken(); |
| --Line->Level; |
| addUnwrappedLine(); |
| } |
| |
| void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { |
| const FormatToken &InitialToken = *FormatTok; |
| nextToken(); |
| |
| // The actual identifier can be a nested name specifier, and in macros |
| // it is often token-pasted. |
| while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, |
| tok::kw___attribute, tok::kw___declspec, |
| tok::kw_alignas) || |
| ((Style.Language == FormatStyle::LK_Java || |
| Style.Language == FormatStyle::LK_JavaScript) && |
| FormatTok->isOneOf(tok::period, tok::comma))) { |
| if (Style.Language == FormatStyle::LK_JavaScript && |
| FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { |
| // JavaScript/TypeScript supports inline object types in |
| // extends/implements positions: |
| // class Foo implements {bar: number} { } |
| nextToken(); |
| if (FormatTok->is(tok::l_brace)) { |
| tryToParseBracedList(); |
| continue; |
| } |
| } |
| bool IsNonMacroIdentifier = |
| FormatTok->is(tok::identifier) && |
| FormatTok->TokenText != FormatTok->TokenText.upper(); |
| nextToken(); |
| // We can have macros or attributes in between 'class' and the class name. |
| if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) |
| parseParens(); |
| } |
| |
| // Note that parsing away template declarations here leads to incorrectly |
| // accepting function declarations as record declarations. |
| // In general, we cannot solve this problem. Consider: |
| // class A<int> B() {} |
| // which can be a function definition or a class definition when B() is a |
| // macro. If we find enough real-world cases where this is a problem, we |
| // can parse for the 'template' keyword in the beginning of the statement, |
| // and thus rule out the record production in case there is no template |
| // (this would still leave us with an ambiguity between template function |
| // and class declarations). |
| if (FormatTok->isOneOf(tok::colon, tok::less)) { |
| while (!eof()) { |
| if (FormatTok->is(tok::l_brace)) { |
| calculateBraceTypes(/*ExpectClassBody=*/true); |
| if (!tryToParseBracedList()) |
| break; |
| } |
| if (FormatTok->Tok.is(tok::semi)) |
| return; |
| nextToken(); |
| } |
| } |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| if (ParseAsExpr) { |
| parseChildBlock(); |
| } else { |
| if (ShouldBreakBeforeBrace(Style, InitialToken)) |
| addUnwrappedLine(); |
| |
| parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, |
| /*MunchSemi=*/false); |
| } |
| } |
| // There is no addUnwrappedLine() here so that we fall through to parsing a |
| // structural element afterwards. Thus, in "class A {} n, m;", |
| // "} n, m;" will end up in one unwrapped line. |
| } |
| |
| void UnwrappedLineParser::parseObjCProtocolList() { |
| assert(FormatTok->Tok.is(tok::less) && "'<' expected."); |
| do |
| nextToken(); |
| while (!eof() && FormatTok->Tok.isNot(tok::greater)); |
| nextToken(); // Skip '>'. |
| } |
| |
| void UnwrappedLineParser::parseObjCUntilAtEnd() { |
| do { |
| if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { |
| nextToken(); |
| addUnwrappedLine(); |
| break; |
| } |
| if (FormatTok->is(tok::l_brace)) { |
| parseBlock(/*MustBeDeclaration=*/false); |
| // In ObjC interfaces, nothing should be following the "}". |
| addUnwrappedLine(); |
| } else if (FormatTok->is(tok::r_brace)) { |
| // Ignore stray "}". parseStructuralElement doesn't consume them. |
| nextToken(); |
| addUnwrappedLine(); |
| } else { |
| parseStructuralElement(); |
| } |
| } while (!eof()); |
| } |
| |
| void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { |
| nextToken(); |
| nextToken(); // interface name |
| |
| // @interface can be followed by either a base class, or a category. |
| if (FormatTok->Tok.is(tok::colon)) { |
| nextToken(); |
| nextToken(); // base class name |
| } else if (FormatTok->Tok.is(tok::l_paren)) |
| // Skip category, if present. |
| parseParens(); |
| |
| if (FormatTok->Tok.is(tok::less)) |
| parseObjCProtocolList(); |
| |
| if (FormatTok->Tok.is(tok::l_brace)) { |
| if (Style.BraceWrapping.AfterObjCDeclaration) |
| addUnwrappedLine(); |
| parseBlock(/*MustBeDeclaration=*/true); |
| } |
| |
| // With instance variables, this puts '}' on its own line. Without instance |
| // variables, this ends the @interface line. |
| addUnwrappedLine(); |
| |
| parseObjCUntilAtEnd(); |
| } |
| |
| void UnwrappedLineParser::parseObjCProtocol() { |
| nextToken(); |
| nextToken(); // protocol name |
| |
| if (FormatTok->Tok.is(tok::less)) |
| parseObjCProtocolList(); |
| |
| // Check for protocol declaration. |
| if (FormatTok->Tok.is(tok::semi)) { |
| nextToken(); |
| return addUnwrappedLine(); |
| } |
| |
| addUnwrappedLine(); |
| parseObjCUntilAtEnd(); |
| } |
| |
| void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { |
| bool IsImport = FormatTok->is(Keywords.kw_import); |
| assert(IsImport || FormatTok->is(tok::kw_export)); |
| nextToken(); |
| |
| // Consume the "default" in "export default class/function". |
| if (FormatTok->is(tok::kw_default)) |
| nextToken(); |
| |
| // Consume "async function", "function" and "default function", so that these |
| // get parsed as free-standing JS functions, i.e. do not require a trailing |
| // semicolon. |
| if (FormatTok->is(Keywords.kw_async)) |
| nextToken(); |
| if (FormatTok->is(Keywords.kw_function)) { |
| nextToken(); |
| return; |
| } |
| |
| // For imports, `export *`, `export {...}`, consume the rest of the line up |
| // to the terminating `;`. For everything else, just return and continue |
| // parsing the structural element, i.e. the declaration or expression for |
| // `export default`. |
| if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && |
| !FormatTok->isStringLiteral()) |
| return; |
| |
| while (!eof()) { |
| if (FormatTok->is(tok::semi)) |
| return; |
| if (Line->Tokens.empty()) { |
| // Common issue: Automatic Semicolon Insertion wrapped the line, so the |
| // import statement should terminate. |
| return; |
| } |
| if (FormatTok->is(tok::l_brace)) { |
| FormatTok->BlockKind = BK_Block; |
| nextToken(); |
| parseBracedList(); |
| } else { |
| nextToken(); |
| } |
| } |
| } |
| |
| LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, |
| StringRef Prefix = "") { |
| llvm::dbgs() << Prefix << "Line(" << Line.Level |
| << ", FSC=" << Line.FirstStartColumn << ")" |
| << (Line.InPPDirective ? " MACRO" : "") << ": "; |
| for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), |
| E = Line.Tokens.end(); |
| I != E; ++I) { |
| llvm::dbgs() << I->Tok->Tok.getName() << "[" |
| << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn |
| << "] "; |
| } |
| for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), |
| E = Line.Tokens.end(); |
| I != E; ++I) { |
| const UnwrappedLineNode &Node = *I; |
| for (SmallVectorImpl<UnwrappedLine>::const_iterator |
| I = Node.Children.begin(), |
| E = Node.Children.end(); |
| I != E; ++I) { |
| printDebugInfo(*I, "\nChild: "); |
| } |
| } |
| llvm::dbgs() << "\n"; |
| } |
| |
| void UnwrappedLineParser::addUnwrappedLine() { |
| if (Line->Tokens.empty()) |
| return; |
| DEBUG({ |
| if (CurrentLines == &Lines) |
| printDebugInfo(*Line); |
| }); |
| CurrentLines->push_back(std::move(*Line)); |
| Line->Tokens.clear(); |
| Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; |
| Line->FirstStartColumn = 0; |
| if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { |
| CurrentLines->append( |
| std::make_move_iterator(PreprocessorDirectives.begin()), |
| std::make_move_iterator(PreprocessorDirectives.end())); |
| PreprocessorDirectives.clear(); |
| } |
| // Disconnect the current token from the last token on the previous line. |
| FormatTok->Previous = nullptr; |
| } |
| |
| bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } |
| |
| bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { |
| return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && |
| FormatTok.NewlinesBefore > 0; |
| } |
| |
| // Checks if \p FormatTok is a line comment that continues the line comment |
| // section on \p Line. |
| static bool continuesLineCommentSection(const FormatToken &FormatTok, |
| const UnwrappedLine &Line, |
| llvm::Regex &CommentPragmasRegex) { |
| if (Line.Tokens.empty()) |
| return false; |
| |
| StringRef IndentContent = FormatTok.TokenText; |
| if (FormatTok.TokenText.startswith("//") || |
| FormatTok.TokenText.startswith("/*")) |
| IndentContent = FormatTok.TokenText.substr(2); |
| if (CommentPragmasRegex.match(IndentContent)) |
| return false; |
| |
| // If Line starts with a line comment, then FormatTok continues the comment |
| // section if its original column is greater or equal to the original start |
| // column of the line. |
| // |
| // Define the min column token of a line as follows: if a line ends in '{' or |
| // contains a '{' followed by a line comment, then the min column token is |
| // that '{'. Otherwise, the min column token of the line is the first token of |
| // the line. |
| // |
| // If Line starts with a token other than a line comment, then FormatTok |
| // continues the comment section if its original column is greater than the |
| // original start column of the min column token of the line. |
| // |
| // For example, the second line comment continues the first in these cases: |
| // |
| // // first line |
| // // second line |
| // |
| // and: |
| // |
| // // first line |
| // // second line |
| // |
| // and: |
| // |
| // int i; // first line |
| // // second line |
| // |
| // and: |
| // |
| // do { // first line |
| // // second line |
| // int i; |
| // } while (true); |
| // |
| // and: |
| // |
| // enum { |
| // a, // first line |
| // // second line |
| // b |
| // }; |
| // |
| // The second line comment doesn't continue the first in these cases: |
| // |
| // // first line |
| // // second line |
| // |
| // and: |
| // |
| // int i; // first line |
| // // second line |
| // |
| // and: |
| // |
| // do { // first line |
| // // second line |
| // int i; |
| // } while (true); |
| // |
| // and: |
| // |
| // enum { |
| // a, // first line |
| // // second line |
| // }; |
| const FormatToken *MinColumnToken = Line.Tokens.front().Tok; |
| |
| // Scan for '{//'. If found, use the column of '{' as a min column for line |
| // comment section continuation. |
| const FormatToken *PreviousToken = nullptr; |
| for (const UnwrappedLineNode &Node : Line.Tokens) { |
| if (PreviousToken && PreviousToken->is(tok::l_brace) && |
| isLineComment(*Node.Tok)) { |
| MinColumnToken = PreviousToken; |
| break; |
| } |
| PreviousToken = Node.Tok; |
| |
| // Grab the last newline preceding a token in this unwrapped line. |
| if (Node.Tok->NewlinesBefore > 0) { |
| MinColumnToken = Node.Tok; |
| } |
| } |
| if (PreviousToken && PreviousToken->is(tok::l_brace)) { |
| MinColumnToken = PreviousToken; |
| } |
| |
| return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, |
| MinColumnToken); |
| } |
| |
| void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { |
| bool JustComments = Line->Tokens.empty(); |
| for (SmallVectorImpl<FormatToken *>::const_iterator |
| I = CommentsBeforeNextToken.begin(), |
| E = CommentsBeforeNextToken.end(); |
| I != E; ++I) { |
| // Line comments that belong to the same line comment section are put on the |
| // same line since later we might want to reflow content between them. |
| // Additional fine-grained breaking of line comment sections is controlled |
| // by the class BreakableLineCommentSection in case it is desirable to keep |
| // several line comment sections in the same unwrapped line. |
| // |
| // FIXME: Consider putting separate line comment sections as children to the |
| // unwrapped line instead. |
| (*I)->ContinuesLineCommentSection = |
| continuesLineCommentSection(**I, *Line, CommentPragmasRegex); |
| if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) |
| addUnwrappedLine(); |
| pushToken(*I); |
| } |
| if (NewlineBeforeNext && JustComments) |
| addUnwrappedLine(); |
| CommentsBeforeNextToken.clear(); |
| } |
| |
| void UnwrappedLineParser::nextToken(int LevelDifference) { |
| if (eof()) |
| return; |
| flushComments(isOnNewLine(*FormatTok)); |
| pushToken(FormatTok); |
| FormatToken *Previous = FormatTok; |
| if (Style.Language != FormatStyle::LK_JavaScript) |
| readToken(LevelDifference); |
| else |
| readTokenWithJavaScriptASI(); |
| FormatTok->Previous = Previous; |
| } |
| |
| void UnwrappedLineParser::distributeComments( |
| const SmallVectorImpl<FormatToken *> &Comments, |
| const FormatToken *NextTok) { |
| // Whether or not a line comment token continues a line is controlled by |
| // the method continuesLineCommentSection, with the following caveat: |
| // |
| // Define a trail of Comments to be a nonempty proper postfix of Comments such |
| // that each comment line from the trail is aligned with the next token, if |
| // the next token exists. If a trail exists, the beginning of the maximal |
| // trail is marked as a start of a new comment section. |
| // |
| // For example in this code: |
| // |
| // int a; // line about a |
| // // line 1 about b |
| // // line 2 about b |
| // int b; |
| // |
| // the two lines about b form a maximal trail, so there are two sections, the |
| // first one consisting of the single comment "// line about a" and the |
| // second one consisting of the next two comments. |
| if (Comments.empty()) |
| return; |
| bool ShouldPushCommentsInCurrentLine = true; |
| bool HasTrailAlignedWithNextToken = false; |
| unsigned StartOfTrailAlignedWithNextToken = 0; |
| if (NextTok) { |
| // We are skipping the first element intentionally. |
| for (unsigned i = Comments.size() - 1; i > 0; --i) { |
| if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { |
| HasTrailAlignedWithNextToken = true; |
| StartOfTrailAlignedWithNextToken = i; |
| } |
| } |
| } |
| for (unsigned i = 0, e = Comments.size(); i < e; ++i) { |
| FormatToken *FormatTok = Comments[i]; |
| if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { |
| FormatTok->ContinuesLineCommentSection = false; |
| } else { |
| FormatTok->ContinuesLineCommentSection = |
| continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); |
| } |
| if (!FormatTok->ContinuesLineCommentSection && |
| (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { |
| ShouldPushCommentsInCurrentLine = false; |
| } |
| if (ShouldPushCommentsInCurrentLine) { |
| pushToken(FormatTok); |
| } else { |
| CommentsBeforeNextToken.push_back(FormatTok); |
| } |
| } |
| } |
| |
| void UnwrappedLineParser::readToken(int LevelDifference) { |
| SmallVector<FormatToken *, 1> Comments; |
| do { |
| FormatTok = Tokens->getNextToken(); |
| assert(FormatTok); |
| while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && |
| (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { |
| distributeComments(Comments, FormatTok); |
| Comments.clear(); |
| // If there is an unfinished unwrapped line, we flush the preprocessor |
| // directives only after that unwrapped line was finished later. |
| bool SwitchToPreprocessorLines = !Line->Tokens.empty(); |
| ScopedLineState BlockState(*this, SwitchToPreprocessorLines); |
| assert((LevelDifference >= 0 || |
| static_cast<unsigned>(-LevelDifference) <= Line->Level) && |
| "LevelDifference makes Line->Level negative"); |
| Line->Level += LevelDifference; |
| // Comments stored before the preprocessor directive need to be output |
| // before the preprocessor directive, at the same level as the |
| // preprocessor directive, as we consider them to apply to the directive. |
| flushComments(isOnNewLine(*FormatTok)); |
| parsePPDirective(); |
| } |
| while (FormatTok->Type == TT_ConflictStart || |
| FormatTok->Type == TT_ConflictEnd || |
| FormatTok->Type == TT_ConflictAlternative) { |
| if (FormatTok->Type == TT_ConflictStart) { |
| conditionalCompilationStart(/*Unreachable=*/false); |
| } else if (FormatTok->Type == TT_ConflictAlternative) { |
| conditionalCompilationAlternative(); |
| } else if (FormatTok->Type == TT_ConflictEnd) { |
| conditionalCompilationEnd(); |
| } |
| FormatTok = Tokens->getNextToken(); |
| FormatTok->MustBreakBefore = true; |
| } |
| |
| if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && |
| !Line->InPPDirective) { |
| continue; |
| } |
| |
| if (!FormatTok->Tok.is(tok::comment)) { |
| distributeComments(Comments, FormatTok); |
| Comments.clear(); |
| return; |
| } |
| |
| Comments.push_back(FormatTok); |
| } while (!eof()); |
| |
| distributeComments(Comments, nullptr); |
| Comments.clear(); |
| } |
| |
| void UnwrappedLineParser::pushToken(FormatToken *Tok) { |
| Line->Tokens.push_back(UnwrappedLineNode(Tok)); |
| if (MustBreakBeforeNextToken) { |
| Line->Tokens.back().Tok->MustBreakBefore = true; |
| MustBreakBeforeNextToken = false; |
| } |
| } |
| |
| } // end namespace format |
| } // end namespace clang |