|  | //===--- FormatToken.cpp - Format C++ code --------------------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// This file implements specific functions of \c FormatTokens and their | 
|  | /// roles. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "FormatToken.h" | 
|  | #include "ContinuationIndenter.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include <climits> | 
|  |  | 
|  | namespace clang { | 
|  | namespace format { | 
|  |  | 
|  | const char *getTokenTypeName(TokenType Type) { | 
|  | static const char *const TokNames[] = { | 
|  | #define TYPE(X) #X, | 
|  | LIST_TOKEN_TYPES | 
|  | #undef TYPE | 
|  | nullptr}; | 
|  |  | 
|  | if (Type < NUM_TOKEN_TYPES) | 
|  | return TokNames[Type]; | 
|  | llvm_unreachable("unknown TokenType"); | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | // FIXME: This is copy&pasted from Sema. Put it in a common place and remove | 
|  | // duplication. | 
|  | bool FormatToken::isSimpleTypeSpecifier() const { | 
|  | switch (Tok.getKind()) { | 
|  | case tok::kw_short: | 
|  | case tok::kw_long: | 
|  | case tok::kw___int64: | 
|  | case tok::kw___int128: | 
|  | case tok::kw_signed: | 
|  | case tok::kw_unsigned: | 
|  | case tok::kw_void: | 
|  | case tok::kw_char: | 
|  | case tok::kw_int: | 
|  | case tok::kw_half: | 
|  | case tok::kw_float: | 
|  | case tok::kw_double: | 
|  | case tok::kw___bf16: | 
|  | case tok::kw__Float16: | 
|  | case tok::kw___float128: | 
|  | case tok::kw___ibm128: | 
|  | case tok::kw_wchar_t: | 
|  | case tok::kw_bool: | 
|  | case tok::kw___underlying_type: | 
|  | case tok::annot_typename: | 
|  | case tok::kw_char8_t: | 
|  | case tok::kw_char16_t: | 
|  | case tok::kw_char32_t: | 
|  | case tok::kw_typeof: | 
|  | case tok::kw_decltype: | 
|  | case tok::kw__Atomic: | 
|  | return true; | 
|  | default: | 
|  | return false; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool FormatToken::isTypeOrIdentifier() const { | 
|  | return isSimpleTypeSpecifier() || Tok.isOneOf(tok::kw_auto, tok::identifier); | 
|  | } | 
|  |  | 
|  | TokenRole::~TokenRole() {} | 
|  |  | 
|  | void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} | 
|  |  | 
|  | unsigned CommaSeparatedList::formatAfterToken(LineState &State, | 
|  | ContinuationIndenter *Indenter, | 
|  | bool DryRun) { | 
|  | if (State.NextToken == nullptr || !State.NextToken->Previous) | 
|  | return 0; | 
|  |  | 
|  | if (Formats.size() == 1) | 
|  | return 0; // Handled by formatFromToken | 
|  |  | 
|  | // Ensure that we start on the opening brace. | 
|  | const FormatToken *LBrace = | 
|  | State.NextToken->Previous->getPreviousNonComment(); | 
|  | if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || | 
|  | LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) || | 
|  | LBrace->Next->is(TT_DesignatedInitializerPeriod)) | 
|  | return 0; | 
|  |  | 
|  | // Calculate the number of code points we have to format this list. As the | 
|  | // first token is already placed, we have to subtract it. | 
|  | unsigned RemainingCodePoints = | 
|  | Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; | 
|  |  | 
|  | // Find the best ColumnFormat, i.e. the best number of columns to use. | 
|  | const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); | 
|  |  | 
|  | // If no ColumnFormat can be used, the braced list would generally be | 
|  | // bin-packed. Add a severe penalty to this so that column layouts are | 
|  | // preferred if possible. | 
|  | if (!Format) | 
|  | return 10000; | 
|  |  | 
|  | // Format the entire list. | 
|  | unsigned Penalty = 0; | 
|  | unsigned Column = 0; | 
|  | unsigned Item = 0; | 
|  | while (State.NextToken != LBrace->MatchingParen) { | 
|  | bool NewLine = false; | 
|  | unsigned ExtraSpaces = 0; | 
|  |  | 
|  | // If the previous token was one of our commas, we are now on the next item. | 
|  | if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { | 
|  | if (!State.NextToken->isTrailingComment()) { | 
|  | ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; | 
|  | ++Column; | 
|  | } | 
|  | ++Item; | 
|  | } | 
|  |  | 
|  | if (Column == Format->Columns || State.NextToken->MustBreakBefore) { | 
|  | Column = 0; | 
|  | NewLine = true; | 
|  | } | 
|  |  | 
|  | // Place token using the continuation indenter and store the penalty. | 
|  | Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); | 
|  | } | 
|  | return Penalty; | 
|  | } | 
|  |  | 
|  | unsigned CommaSeparatedList::formatFromToken(LineState &State, | 
|  | ContinuationIndenter *Indenter, | 
|  | bool DryRun) { | 
|  | // Formatting with 1 Column isn't really a column layout, so we don't need the | 
|  | // special logic here. We can just avoid bin packing any of the parameters. | 
|  | if (Formats.size() == 1 || HasNestedBracedList) | 
|  | State.Stack.back().AvoidBinPacking = true; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // Returns the lengths in code points between Begin and End (both included), | 
|  | // assuming that the entire sequence is put on a single line. | 
|  | static unsigned CodePointsBetween(const FormatToken *Begin, | 
|  | const FormatToken *End) { | 
|  | assert(End->TotalLength >= Begin->TotalLength); | 
|  | return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; | 
|  | } | 
|  |  | 
|  | void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { | 
|  | // FIXME: At some point we might want to do this for other lists, too. | 
|  | if (!Token->MatchingParen || | 
|  | !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) | 
|  | return; | 
|  |  | 
|  | // In C++11 braced list style, we should not format in columns unless they | 
|  | // have many items (20 or more) or we allow bin-packing of function call | 
|  | // arguments. | 
|  | if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && | 
|  | Commas.size() < 19) | 
|  | return; | 
|  |  | 
|  | // Limit column layout for JavaScript array initializers to 20 or more items | 
|  | // for now to introduce it carefully. We can become more aggressive if this | 
|  | // necessary. | 
|  | if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) | 
|  | return; | 
|  |  | 
|  | // Column format doesn't really make sense if we don't align after brackets. | 
|  | if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) | 
|  | return; | 
|  |  | 
|  | FormatToken *ItemBegin = Token->Next; | 
|  | while (ItemBegin->isTrailingComment()) | 
|  | ItemBegin = ItemBegin->Next; | 
|  | SmallVector<bool, 8> MustBreakBeforeItem; | 
|  |  | 
|  | // The lengths of an item if it is put at the end of the line. This includes | 
|  | // trailing comments which are otherwise ignored for column alignment. | 
|  | SmallVector<unsigned, 8> EndOfLineItemLength; | 
|  |  | 
|  | bool HasSeparatingComment = false; | 
|  | for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { | 
|  | // Skip comments on their own line. | 
|  | while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { | 
|  | ItemBegin = ItemBegin->Next; | 
|  | HasSeparatingComment = i > 0; | 
|  | } | 
|  |  | 
|  | MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); | 
|  | if (ItemBegin->is(tok::l_brace)) | 
|  | HasNestedBracedList = true; | 
|  | const FormatToken *ItemEnd = nullptr; | 
|  | if (i == Commas.size()) { | 
|  | ItemEnd = Token->MatchingParen; | 
|  | const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); | 
|  | ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); | 
|  | if (Style.Cpp11BracedListStyle && | 
|  | !ItemEnd->Previous->isTrailingComment()) { | 
|  | // In Cpp11 braced list style, the } and possibly other subsequent | 
|  | // tokens will need to stay on a line with the last element. | 
|  | while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) | 
|  | ItemEnd = ItemEnd->Next; | 
|  | } else { | 
|  | // In other braced lists styles, the "}" can be wrapped to the new line. | 
|  | ItemEnd = Token->MatchingParen->Previous; | 
|  | } | 
|  | } else { | 
|  | ItemEnd = Commas[i]; | 
|  | // The comma is counted as part of the item when calculating the length. | 
|  | ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); | 
|  |  | 
|  | // Consume trailing comments so the are included in EndOfLineItemLength. | 
|  | if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && | 
|  | ItemEnd->Next->isTrailingComment()) | 
|  | ItemEnd = ItemEnd->Next; | 
|  | } | 
|  | EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); | 
|  | // If there is a trailing comma in the list, the next item will start at the | 
|  | // closing brace. Don't create an extra item for this. | 
|  | if (ItemEnd->getNextNonComment() == Token->MatchingParen) | 
|  | break; | 
|  | ItemBegin = ItemEnd->Next; | 
|  | } | 
|  |  | 
|  | // Don't use column layout for lists with few elements and in presence of | 
|  | // separating comments. | 
|  | if (Commas.size() < 5 || HasSeparatingComment) | 
|  | return; | 
|  |  | 
|  | if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) | 
|  | return; | 
|  |  | 
|  | // We can never place more than ColumnLimit / 3 items in a row (because of the | 
|  | // spaces and the comma). | 
|  | unsigned MaxItems = Style.ColumnLimit / 3; | 
|  | std::vector<unsigned> MinSizeInColumn; | 
|  | MinSizeInColumn.reserve(MaxItems); | 
|  | for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { | 
|  | ColumnFormat Format; | 
|  | Format.Columns = Columns; | 
|  | Format.ColumnSizes.resize(Columns); | 
|  | MinSizeInColumn.assign(Columns, UINT_MAX); | 
|  | Format.LineCount = 1; | 
|  | bool HasRowWithSufficientColumns = false; | 
|  | unsigned Column = 0; | 
|  | for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { | 
|  | assert(i < MustBreakBeforeItem.size()); | 
|  | if (MustBreakBeforeItem[i] || Column == Columns) { | 
|  | ++Format.LineCount; | 
|  | Column = 0; | 
|  | } | 
|  | if (Column == Columns - 1) | 
|  | HasRowWithSufficientColumns = true; | 
|  | unsigned Length = | 
|  | (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; | 
|  | Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); | 
|  | MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); | 
|  | ++Column; | 
|  | } | 
|  | // If all rows are terminated early (e.g. by trailing comments), we don't | 
|  | // need to look further. | 
|  | if (!HasRowWithSufficientColumns) | 
|  | break; | 
|  | Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. | 
|  |  | 
|  | for (unsigned i = 0; i < Columns; ++i) | 
|  | Format.TotalWidth += Format.ColumnSizes[i]; | 
|  |  | 
|  | // Don't use this Format, if the difference between the longest and shortest | 
|  | // element in a column exceeds a threshold to avoid excessive spaces. | 
|  | if ([&] { | 
|  | for (unsigned i = 0; i < Columns - 1; ++i) | 
|  | if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) | 
|  | return true; | 
|  | return false; | 
|  | }()) | 
|  | continue; | 
|  |  | 
|  | // Ignore layouts that are bound to violate the column limit. | 
|  | if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) | 
|  | continue; | 
|  |  | 
|  | Formats.push_back(Format); | 
|  | } | 
|  | } | 
|  |  | 
|  | const CommaSeparatedList::ColumnFormat * | 
|  | CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { | 
|  | const ColumnFormat *BestFormat = nullptr; | 
|  | for (const ColumnFormat &Format : llvm::reverse(Formats)) { | 
|  | if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { | 
|  | if (BestFormat && Format.LineCount > BestFormat->LineCount) | 
|  | break; | 
|  | BestFormat = &Format; | 
|  | } | 
|  | } | 
|  | return BestFormat; | 
|  | } | 
|  |  | 
|  | } // namespace format | 
|  | } // namespace clang |