| //===--- Format.cpp -----------------------------------------*- C++-*------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| #include "Format.h" |
| #include "support/Logger.h" |
| #include "clang/Basic/FileManager.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Format/Format.h" |
| #include "clang/Lex/Lexer.h" |
| #include "clang/Tooling/Core/Replacement.h" |
| #include "llvm/Support/Unicode.h" |
| |
| namespace clang { |
| namespace clangd { |
| namespace { |
| |
| /// Append closing brackets )]} to \p Code to make it well-formed. |
| /// Clang-format conservatively refuses to format files with unmatched brackets |
| /// as it isn't sure where the errors are and so can't correct. |
| /// When editing, it's reasonable to assume code before the cursor is complete. |
| void closeBrackets(std::string &Code, const format::FormatStyle &Style) { |
| SourceManagerForFile FileSM("mock_file.cpp", Code); |
| auto &SM = FileSM.get(); |
| FileID FID = SM.getMainFileID(); |
| Lexer Lex(FID, SM.getBufferOrFake(FID), SM, |
| format::getFormattingLangOpts(Style)); |
| Token Tok; |
| std::vector<char> Brackets; |
| while (!Lex.LexFromRawLexer(Tok)) { |
| switch(Tok.getKind()) { |
| case tok::l_paren: |
| Brackets.push_back(')'); |
| break; |
| case tok::l_brace: |
| Brackets.push_back('}'); |
| break; |
| case tok::l_square: |
| Brackets.push_back(']'); |
| break; |
| case tok::r_paren: |
| if (!Brackets.empty() && Brackets.back() == ')') |
| Brackets.pop_back(); |
| break; |
| case tok::r_brace: |
| if (!Brackets.empty() && Brackets.back() == '}') |
| Brackets.pop_back(); |
| break; |
| case tok::r_square: |
| if (!Brackets.empty() && Brackets.back() == ']') |
| Brackets.pop_back(); |
| break; |
| default: |
| continue; |
| } |
| } |
| // Attempt to end any open comments first. |
| Code.append("\n// */\n"); |
| Code.append(Brackets.rbegin(), Brackets.rend()); |
| } |
| |
| static StringRef commentMarker(llvm::StringRef Line) { |
| for (StringRef Marker : {"///", "//"}){ |
| auto I = Line.rfind(Marker); |
| if (I != StringRef::npos) |
| return Line.substr(I, Marker.size()); |
| } |
| return ""; |
| } |
| |
| llvm::StringRef firstLine(llvm::StringRef Code) { |
| return Code.take_until([](char C) { return C == '\n'; }); |
| } |
| |
| llvm::StringRef lastLine(llvm::StringRef Code) { |
| llvm::StringRef Rest = Code; |
| while (!Rest.empty() && Rest.back() != '\n') |
| Rest = Rest.drop_back(); |
| return Code.substr(Rest.size()); |
| } |
| |
| // Filename is needed for tooling::Replacement and some overloads of reformat(). |
| // Its value should not affect the outcome. We use the default from reformat(). |
| llvm::StringRef Filename = "<stdin>"; |
| |
| // tooling::Replacement from overlapping StringRefs: From must be part of Code. |
| tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From, |
| llvm::StringRef To) { |
| assert(From.begin() >= Code.begin() && From.end() <= Code.end()); |
| // The filename is required but ignored. |
| return tooling::Replacement(Filename, From.data() - Code.data(), |
| From.size(), To); |
| } |
| |
| // High-level representation of incremental formatting changes. |
| // The changes are made in two steps. |
| // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding |
| // comment markers when splitting a line comment with a newline). |
| // 2) a selective clang-format run: |
| // - the "source code" passed to clang format is the code up to the cursor, |
| // a placeholder for the cursor, and some closing brackets |
| // - the formatting is restricted to the cursor and (possibly) other ranges |
| // (e.g. the old line when inserting a newline). |
| // - changes before the cursor are applied, those after are discarded. |
| struct IncrementalChanges { |
| // Changes that should be applied before running clang-format. |
| tooling::Replacements Changes; |
| // Ranges of the original source code that should be clang-formatted. |
| // The CursorProxyText will also be formatted. |
| std::vector<tooling::Range> FormatRanges; |
| // The source code that should stand in for the cursor when clang-formatting. |
| // e.g. after inserting a newline, a line-comment at the cursor is used to |
| // ensure that the newline is preserved. |
| std::string CursorPlaceholder; |
| }; |
| |
| // After a newline: |
| // - we continue any line-comment that was split |
| // - we format the old line in addition to the cursor |
| // - we represent the cursor with a line comment to preserve the newline |
| IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code, |
| unsigned Cursor) { |
| IncrementalChanges Result; |
| // Before newline, code looked like: |
| // leading^trailing |
| // After newline, code looks like: |
| // leading |
| // indentation^trailing |
| // Where indentation was added by the editor. |
| StringRef Trailing = firstLine(Code.substr(Cursor)); |
| StringRef Indentation = lastLine(Code.take_front(Cursor)); |
| if (Indentation.data() == Code.data()) { |
| vlog("Typed a newline, but we're still on the first line!"); |
| return Result; |
| } |
| StringRef Leading = |
| lastLine(Code.take_front(Indentation.data() - Code.data() - 1)); |
| StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1)); |
| |
| // Strip leading whitespace on trailing line. |
| StringRef TrailingTrim = Trailing.ltrim(); |
| if (unsigned TrailWS = Trailing.size() - TrailingTrim.size()) |
| cantFail(Result.Changes.add( |
| replacement(Code, StringRef(Trailing.begin(), TrailWS), ""))); |
| |
| // If we split a comment, replace indentation with a comment marker. |
| // If the editor made the new line a comment, also respect that. |
| StringRef CommentMarker = commentMarker(Leading); |
| bool NewLineIsComment = !commentMarker(Indentation).empty(); |
| if (!CommentMarker.empty() && |
| (NewLineIsComment || !commentMarker(NextLine).empty() || |
| (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) { |
| using llvm::sys::unicode::columnWidthUTF8; |
| // We indent the new comment to match the previous one. |
| StringRef PreComment = |
| Leading.take_front(CommentMarker.data() - Leading.data()); |
| std::string IndentAndComment = |
| (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ") |
| .str(); |
| cantFail( |
| Result.Changes.add(replacement(Code, Indentation, IndentAndComment))); |
| } else { |
| // Remove any indentation and let clang-format re-add it. |
| // This prevents the cursor marker dragging e.g. an aligned comment with it. |
| cantFail(Result.Changes.add(replacement(Code, Indentation, ""))); |
| } |
| |
| // If we put a the newline inside a {} pair, put } on its own line... |
| if (CommentMarker.empty() && Leading.endswith("{") && |
| Trailing.startswith("}")) { |
| cantFail( |
| Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}"))); |
| // ...and format it. |
| Result.FormatRanges.push_back( |
| tooling::Range(Trailing.data() - Code.data() + 1, 1)); |
| } |
| |
| // Format the whole leading line. |
| Result.FormatRanges.push_back( |
| tooling::Range(Leading.data() - Code.data(), Leading.size())); |
| |
| // We use a comment to represent the cursor, to preserve the newline. |
| // A trailing identifier improves parsing of e.g. for without braces. |
| // Exception: if the previous line has a trailing comment, we can't use one |
| // as the cursor (they will be aligned). But in this case we don't need to. |
| Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident"; |
| |
| return Result; |
| } |
| |
| IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor, |
| llvm::StringRef InsertedText) { |
| IncrementalChanges Result; |
| if (InsertedText == "\n") |
| return getIncrementalChangesAfterNewline(Code, Cursor); |
| |
| Result.CursorPlaceholder = " /**/"; |
| return Result; |
| } |
| |
| // Returns equivalent replacements that preserve the correspondence between |
| // OldCursor and NewCursor. If OldCursor lies in a replaced region, that |
| // replacement will be split. |
| std::vector<tooling::Replacement> |
| split(const tooling::Replacements &Replacements, unsigned OldCursor, |
| unsigned NewCursor) { |
| std::vector<tooling::Replacement> Result; |
| int LengthChange = 0; |
| for (const tooling::Replacement &R : Replacements) { |
| if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor |
| Result.push_back(R); |
| LengthChange += R.getReplacementText().size() - R.getLength(); |
| } else if (R.getOffset() < OldCursor) { // overlaps cursor |
| int ReplacementSplit = NewCursor - LengthChange - R.getOffset(); |
| assert(ReplacementSplit >= 0 && |
| ReplacementSplit <= int(R.getReplacementText().size()) && |
| "NewCursor incompatible with OldCursor!"); |
| Result.push_back(tooling::Replacement( |
| R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(), |
| R.getReplacementText().take_front(ReplacementSplit))); |
| Result.push_back(tooling::Replacement( |
| R.getFilePath(), OldCursor, |
| R.getLength() - (OldCursor - R.getOffset()), |
| R.getReplacementText().drop_front(ReplacementSplit))); |
| } else if (R.getOffset() >= OldCursor) { // after cursor |
| Result.push_back(R); |
| } |
| } |
| return Result; |
| } |
| |
| } // namespace |
| |
| // We're simulating the following sequence of changes: |
| // - apply the pre-formatting edits (see getIncrementalChanges) |
| // - insert a placeholder for the cursor |
| // - format some of the resulting code |
| // - remove the cursor placeholder again |
| // The replacements we return are produced by composing these. |
| // |
| // The text we actually pass to clang-format is slightly different from this, |
| // e.g. we have to close brackets. We ensure these differences are *after* |
| // all the regions we want to format, and discard changes in them. |
| std::vector<tooling::Replacement> |
| formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, |
| llvm::StringRef InsertedText, format::FormatStyle Style) { |
| IncrementalChanges Incremental = |
| getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText); |
| // Never *remove* lines in response to pressing enter! This annoys users. |
| if (InsertedText == "\n") { |
| Style.MaxEmptyLinesToKeep = 1000; |
| Style.KeepEmptyLinesAtTheStartOfBlocks = true; |
| } |
| |
| // Compute the code we want to format: |
| // 1) Start with code after the pre-formatting edits. |
| std::string CodeToFormat = cantFail( |
| tooling::applyAllReplacements(OriginalCode, Incremental.Changes)); |
| unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor); |
| // 2) Truncate code after the last interesting range. |
| unsigned FormatLimit = Cursor; |
| for (tooling::Range &R : Incremental.FormatRanges) |
| FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength()); |
| CodeToFormat.resize(FormatLimit); |
| // 3) Insert a placeholder for the cursor. |
| CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder); |
| // 4) Append brackets after FormatLimit so the code is well-formed. |
| closeBrackets(CodeToFormat, Style); |
| |
| // Determine the ranges to format: |
| std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges; |
| // Ranges after the cursor need to be adjusted for the placeholder. |
| for (auto &R : RangesToFormat) { |
| if (R.getOffset() > Cursor) |
| R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(), |
| R.getLength()); |
| } |
| // We also format the cursor. |
| RangesToFormat.push_back( |
| tooling::Range(Cursor, Incremental.CursorPlaceholder.size())); |
| // Also update FormatLimit for the placeholder, we'll use this later. |
| FormatLimit += Incremental.CursorPlaceholder.size(); |
| |
| // Run clang-format, and truncate changes at FormatLimit. |
| tooling::Replacements FormattingChanges; |
| format::FormattingAttemptStatus Status; |
| for (const tooling::Replacement &R : format::reformat( |
| Style, CodeToFormat, RangesToFormat, Filename, &Status)) { |
| if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit. |
| cantFail(FormattingChanges.add(R)); |
| else if(R.getOffset() < FormatLimit) { // Overlaps limit. |
| if (R.getReplacementText().empty()) // Deletions are easy to handle. |
| cantFail(FormattingChanges.add(tooling::Replacement(Filename, |
| R.getOffset(), FormatLimit - R.getOffset(), ""))); |
| else |
| // Hopefully won't happen in practice? |
| elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}", |
| Cursor, CodeToFormat); |
| } |
| } |
| if (!Status.FormatComplete) |
| vlog("Incremental format incomplete at line {0}", Status.Line); |
| |
| // Now we are ready to compose the changes relative to OriginalCode. |
| // edits -> insert placeholder -> format -> remove placeholder. |
| // We must express insert/remove as Replacements. |
| tooling::Replacements InsertCursorPlaceholder( |
| tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder)); |
| unsigned FormattedCursorStart = |
| FormattingChanges.getShiftedCodePosition(Cursor), |
| FormattedCursorEnd = FormattingChanges.getShiftedCodePosition( |
| Cursor + Incremental.CursorPlaceholder.size()); |
| tooling::Replacements RemoveCursorPlaceholder( |
| tooling::Replacement(Filename, FormattedCursorStart, |
| FormattedCursorEnd - FormattedCursorStart, "")); |
| |
| // We can't simply merge() and return: tooling::Replacements will combine |
| // adjacent edits left and right of the cursor. This gives the right source |
| // code, but loses information about where the cursor is! |
| // Fortunately, none of the individual passes lose information, so: |
| // - we use merge() to compute the final Replacements |
| // - we chain getShiftedCodePosition() to compute final cursor position |
| // - we split the final Replacements at the cursor position, so that |
| // each Replacement lies either before or after the cursor. |
| tooling::Replacements Final; |
| unsigned FinalCursor = OriginalCursor; |
| #ifndef NDEBUG |
| std::string FinalCode = std::string(OriginalCode); |
| dlog("Initial code: {0}", FinalCode); |
| #endif |
| for (auto Pass : |
| std::vector<std::pair<const char *, const tooling::Replacements *>>{ |
| {"Pre-formatting changes", &Incremental.Changes}, |
| {"Insert placeholder", &InsertCursorPlaceholder}, |
| {"clang-format", &FormattingChanges}, |
| {"Remove placeholder", &RemoveCursorPlaceholder}}) { |
| Final = Final.merge(*Pass.second); |
| FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor); |
| #ifndef NDEBUG |
| FinalCode = |
| cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second)); |
| dlog("After {0}:\n{1}^{2}", Pass.first, |
| StringRef(FinalCode).take_front(FinalCursor), |
| StringRef(FinalCode).drop_front(FinalCursor)); |
| #endif |
| } |
| return split(Final, OriginalCursor, FinalCursor); |
| } |
| |
| unsigned |
| transformCursorPosition(unsigned Offset, |
| const std::vector<tooling::Replacement> &Replacements) { |
| unsigned OriginalOffset = Offset; |
| for (const auto &R : Replacements) { |
| if (R.getOffset() + R.getLength() <= OriginalOffset) { |
| // Replacement is before cursor. |
| Offset += R.getReplacementText().size(); |
| Offset -= R.getLength(); |
| } else if (R.getOffset() < OriginalOffset) { |
| // Replacement overlaps cursor. |
| // Preserve position within replacement text, as far as possible. |
| unsigned PositionWithinReplacement = Offset - R.getOffset(); |
| if (PositionWithinReplacement > R.getReplacementText().size()) { |
| Offset += R.getReplacementText().size(); |
| Offset -= PositionWithinReplacement; |
| } |
| } else { |
| // Replacement after cursor. |
| break; // Replacements are sorted, the rest are also after the cursor. |
| } |
| } |
| return Offset; |
| } |
| |
| } // namespace clangd |
| } // namespace clang |