clang-tools-extra/clang-tidy/utils/LexerUtils.cpp - llvm-project - Git at Google

 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "LexerUtils.h"
 #include "clang/AST/AST.h"
 #include "clang/Basic/SourceManager.h"

 namespace clang {
 namespace tidy {
 namespace utils {
 namespace lexer {

 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
                        const LangOptions &LangOpts, bool SkipComments) {
   Token Token;
   Token.setKind(tok::unknown);

   Location = Location.getLocWithOffset(-1);
   if (Location.isInvalid())
       return Token;

   auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
   while (Location != StartOfFile) {
     Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
     if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
         (!SkipComments || !Token.is(tok::comment))) {
       break;
     }
     Location = Location.getLocWithOffset(-1);
   }
   return Token;
 }

 SourceLocation findPreviousTokenStart(SourceLocation Start,
                                       const SourceManager &SM,
                                       const LangOptions &LangOpts) {
   if (Start.isInvalid() || Start.isMacroID())
     return SourceLocation();

   SourceLocation BeforeStart = Start.getLocWithOffset(-1);
   if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
     return SourceLocation();

   return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
 }

 SourceLocation findPreviousTokenKind(SourceLocation Start,
                                      const SourceManager &SM,
                                      const LangOptions &LangOpts,
                                      tok::TokenKind TK) {
   if (Start.isInvalid() || Start.isMacroID())
     return SourceLocation();

   while (true) {
     SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
     if (L.isInvalid() || L.isMacroID())
       return SourceLocation();

     Token T;
     if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
       return SourceLocation();

     if (T.is(TK))
       return T.getLocation();

     Start = L;
   }
 }

 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
                                   const LangOptions &LangOpts) {
   return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
 }

 Optional<Token> findNextTokenSkippingComments(SourceLocation Start,
                                               const SourceManager &SM,
                                               const LangOptions &LangOpts) {
   Optional<Token> CurrentToken;
   do {
     CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
   } while (CurrentToken && CurrentToken->is(tok::comment));
   return CurrentToken;
 }

 bool rangeContainsExpansionsOrDirectives(SourceRange Range,
                                          const SourceManager &SM,
                                          const LangOptions &LangOpts) {
   assert(Range.isValid() && "Invalid Range for relexing provided");
   SourceLocation Loc = Range.getBegin();

   while (Loc < Range.getEnd()) {
     if (Loc.isMacroID())
       return true;

     llvm::Optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);

     if (!Tok)
       return true;

     if (Tok->is(tok::hash))
       return true;

     Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
   }

   return false;
 }

 llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
                                          CharSourceRange Range,
                                          const ASTContext &Context,
                                          const SourceManager &SM) {
   assert((TK == tok::kw_const || TK == tok::kw_volatile ||
           TK == tok::kw_restrict) &&
          "TK is not a qualifier keyword");
   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
   StringRef File = SM.getBufferData(LocInfo.first);
   Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
                  File.begin(), File.data() + LocInfo.second, File.end());
   llvm::Optional<Token> LastMatchBeforeTemplate;
   llvm::Optional<Token> LastMatchAfterTemplate;
   bool SawTemplate = false;
   Token Tok;
   while (!RawLexer.LexFromRawLexer(Tok) &&
          Range.getEnd() != Tok.getLocation() &&
          !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
     if (Tok.is(tok::raw_identifier)) {
       IdentifierInfo &Info = Context.Idents.get(
           StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
       Tok.setIdentifierInfo(&Info);
       Tok.setKind(Info.getTokenID());
     }
     if (Tok.is(tok::less))
       SawTemplate = true;
     else if (Tok.isOneOf(tok::greater, tok::greatergreater))
       LastMatchAfterTemplate = None;
     else if (Tok.is(TK)) {
       if (SawTemplate)
         LastMatchAfterTemplate = Tok;
       else
         LastMatchBeforeTemplate = Tok;
     }
   }
   return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
                                         : LastMatchBeforeTemplate;
 }

 static bool breakAndReturnEnd(const Stmt &S) {
   return isa<CompoundStmt, DeclStmt, NullStmt>(S);
 }

 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
   return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
 }

 // Given a Stmt which does not include it's semicolon this method returns the
 // SourceLocation of the semicolon.
 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
                                                   const SourceManager &SM,
                                                   const LangOptions &LangOpts) {

   if (EndLoc.isMacroID()) {
     // Assuming EndLoc points to a function call foo within macro F.
     // This method is supposed to return location of the semicolon within
     // those macro arguments:
     //  F     (      foo()               ;   )
     //  ^ EndLoc         ^ SpellingLoc   ^ next token of SpellingLoc
     const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
     Optional<Token> NextTok =
         findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);

     // Was the next token found successfully?
     // All macro issues are simply resolved by ensuring it's a semicolon.
     if (NextTok && NextTok->is(tok::TokenKind::semi)) {
       // Ideally this would return `F` with spelling location `;` (NextTok)
       // following the example above. For now simply return NextTok location.
       return NextTok->getLocation();
     }

     // Fallthrough to 'normal handling'.
     //  F     (      foo()              ) ;
     //  ^ EndLoc         ^ SpellingLoc  ) ^ next token of EndLoc
   }

   Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);

   // Testing for semicolon again avoids some issues with macros.
   if (NextTok && NextTok->is(tok::TokenKind::semi))
     return NextTok->getLocation();

   return SourceLocation();
 }

 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
                                 const LangOptions &LangOpts) {

   const Stmt *LastChild = &S;
   while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
          !breakAndReturnEndPlus1Token(*LastChild)) {
     for (const Stmt *Child : LastChild->children())
       LastChild = Child;
   }

   if (!breakAndReturnEnd(*LastChild) &&
       breakAndReturnEndPlus1Token(*LastChild))
     return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);

   return S.getEndLoc();
 }

 } // namespace lexer
 } // namespace utils
 } // namespace tidy
 } // namespace clang
	//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "LexerUtils.h"
	#include "clang/AST/AST.h"
	#include "clang/Basic/SourceManager.h"

	namespace clang {
	namespace tidy {
	namespace utils {
	namespace lexer {

	Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
	const LangOptions &LangOpts, bool SkipComments) {
	Token Token;
	Token.setKind(tok::unknown);

	Location = Location.getLocWithOffset(-1);
	if (Location.isInvalid())
	return Token;

	auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
	while (Location != StartOfFile) {
	Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
	if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
	(!SkipComments \|\| !Token.is(tok::comment))) {
	break;
	}
	Location = Location.getLocWithOffset(-1);
	}
	return Token;
	}

	SourceLocation findPreviousTokenStart(SourceLocation Start,
	const SourceManager &SM,
	const LangOptions &LangOpts) {
	if (Start.isInvalid() \|\| Start.isMacroID())
	return SourceLocation();

	SourceLocation BeforeStart = Start.getLocWithOffset(-1);
	if (BeforeStart.isInvalid() \|\| BeforeStart.isMacroID())
	return SourceLocation();

	return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
	}

	SourceLocation findPreviousTokenKind(SourceLocation Start,
	const SourceManager &SM,
	const LangOptions &LangOpts,
	tok::TokenKind TK) {
	if (Start.isInvalid() \|\| Start.isMacroID())
	return SourceLocation();

	while (true) {
	SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
	if (L.isInvalid() \|\| L.isMacroID())
	return SourceLocation();

	Token T;
	if (Lexer::getRawToken(L, T, SM, LangOpts, /IgnoreWhiteSpace=/true))
	return SourceLocation();

	if (T.is(TK))
	return T.getLocation();

	Start = L;
	}
	}

	SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
	const LangOptions &LangOpts) {
	return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
	}

	Optional<Token> findNextTokenSkippingComments(SourceLocation Start,
	const SourceManager &SM,
	const LangOptions &LangOpts) {
	Optional<Token> CurrentToken;
	do {
	CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
	} while (CurrentToken && CurrentToken->is(tok::comment));
	return CurrentToken;
	}

	bool rangeContainsExpansionsOrDirectives(SourceRange Range,
	const SourceManager &SM,
	const LangOptions &LangOpts) {
	assert(Range.isValid() && "Invalid Range for relexing provided");
	SourceLocation Loc = Range.getBegin();

	while (Loc < Range.getEnd()) {
	if (Loc.isMacroID())
	return true;

	llvm::Optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);

	if (!Tok)
	return true;

	if (Tok->is(tok::hash))
	return true;

	Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
	}

	return false;
	}

	llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
	CharSourceRange Range,
	const ASTContext &Context,
	const SourceManager &SM) {
	assert((TK == tok::kw_const \|\| TK == tok::kw_volatile \|\|
	TK == tok::kw_restrict) &&
	"TK is not a qualifier keyword");
	std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
	StringRef File = SM.getBufferData(LocInfo.first);
	Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
	File.begin(), File.data() + LocInfo.second, File.end());
	llvm::Optional<Token> LastMatchBeforeTemplate;
	llvm::Optional<Token> LastMatchAfterTemplate;
	bool SawTemplate = false;
	Token Tok;
	while (!RawLexer.LexFromRawLexer(Tok) &&
	Range.getEnd() != Tok.getLocation() &&
	!SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
	if (Tok.is(tok::raw_identifier)) {
	IdentifierInfo &Info = Context.Idents.get(
	StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
	Tok.setIdentifierInfo(&Info);
	Tok.setKind(Info.getTokenID());
	}
	if (Tok.is(tok::less))
	SawTemplate = true;
	else if (Tok.isOneOf(tok::greater, tok::greatergreater))
	LastMatchAfterTemplate = None;
	else if (Tok.is(TK)) {
	if (SawTemplate)
	LastMatchAfterTemplate = Tok;
	else
	LastMatchBeforeTemplate = Tok;
	}
	}
	return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
	: LastMatchBeforeTemplate;
	}

	static bool breakAndReturnEnd(const Stmt &S) {
	return isa<CompoundStmt, DeclStmt, NullStmt>(S);
	}

	static bool breakAndReturnEndPlus1Token(const Stmt &S) {
	return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
	}

	// Given a Stmt which does not include it's semicolon this method returns the
	// SourceLocation of the semicolon.
	static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
	const SourceManager &SM,
	const LangOptions &LangOpts) {

	if (EndLoc.isMacroID()) {
	// Assuming EndLoc points to a function call foo within macro F.
	// This method is supposed to return location of the semicolon within
	// those macro arguments:
	// F ( foo() ; )
	// ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
	const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
	Optional<Token> NextTok =
	findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);

	// Was the next token found successfully?
	// All macro issues are simply resolved by ensuring it's a semicolon.
	if (NextTok && NextTok->is(tok::TokenKind::semi)) {
	// Ideally this would return `F` with spelling location `;` (NextTok)
	// following the example above. For now simply return NextTok location.
	return NextTok->getLocation();
	}

	// Fallthrough to 'normal handling'.
	// F ( foo() ) ;
	// ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
	}

	Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);

	// Testing for semicolon again avoids some issues with macros.
	if (NextTok && NextTok->is(tok::TokenKind::semi))
	return NextTok->getLocation();

	return SourceLocation();
	}

	SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
	const LangOptions &LangOpts) {

	const Stmt *LastChild = &S;
	while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
	!breakAndReturnEndPlus1Token(*LastChild)) {
	for (const Stmt *Child : LastChild->children())
	LastChild = Child;
	}

	if (!breakAndReturnEnd(*LastChild) &&
	breakAndReturnEndPlus1Token(*LastChild))
	return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);

	return S.getEndLoc();
	}

	} // namespace lexer
	} // namespace utils
	} // namespace tidy
	} // namespace clang