reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
  359
  360
  361
  362
  363
  364
  365
  366
  367
  368
  369
  370
  371
  372
  373
  374
  375
  376
  377
//===--- Format.cpp -----------------------------------------*- C++-*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Format.h"
#include "Logger.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/Unicode.h"

namespace clang {
namespace clangd {
namespace {

/// Append closing brackets )]} to \p Code to make it well-formed.
/// Clang-format conservatively refuses to format files with unmatched brackets
/// as it isn't sure where the errors are and so can't correct.
/// When editing, it's reasonable to assume code before the cursor is complete.
void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
  SourceManagerForFile FileSM("dummy.cpp", Code);
  auto &SM = FileSM.get();
  FileID FID = SM.getMainFileID();
  Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
  Token Tok;
  std::vector<char> Brackets;
  while (!Lex.LexFromRawLexer(Tok)) {
    switch(Tok.getKind()) {
      case tok::l_paren:
        Brackets.push_back(')');
        break;
      case tok::l_brace:
        Brackets.push_back('}');
        break;
      case tok::l_square:
        Brackets.push_back(']');
        break;
      case tok::r_paren:
        if (!Brackets.empty() && Brackets.back() == ')')
          Brackets.pop_back();
        break;
      case tok::r_brace:
        if (!Brackets.empty() && Brackets.back() == '}')
          Brackets.pop_back();
        break;
      case tok::r_square:
        if (!Brackets.empty() && Brackets.back() == ']')
          Brackets.pop_back();
        break;
      default:
        continue;
    }
  }
  // Attempt to end any open comments first.
  Code.append("\n// */\n");
  Code.append(Brackets.rbegin(), Brackets.rend());
}

static StringRef commentMarker(llvm::StringRef Line) {
  for (StringRef Marker : {"///", "//"}){
    auto I = Line.rfind(Marker);
    if (I != StringRef::npos)
      return Line.substr(I, Marker.size());
  }
  return "";
}

llvm::StringRef firstLine(llvm::StringRef Code) {
  return Code.take_until([](char C) { return C == '\n'; });
}

llvm::StringRef lastLine(llvm::StringRef Code) {
  llvm::StringRef Rest = Code;
  while (!Rest.empty() && Rest.back() != '\n')
    Rest = Rest.drop_back();
  return Code.substr(Rest.size());
}

// Filename is needed for tooling::Replacement and some overloads of reformat().
// Its value should not affect the outcome. We use the default from reformat().
llvm::StringRef Filename = "<stdin>";

// tooling::Replacement from overlapping StringRefs: From must be part of Code.
tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
                                 llvm::StringRef To) {
  assert(From.begin() >= Code.begin() && From.end() <= Code.end());
  // The filename is required but ignored.
  return tooling::Replacement(Filename, From.data() - Code.data(),
                              From.size(), To);
}

// High-level representation of incremental formatting changes.
// The changes are made in two steps.
// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
//    comment markers when splitting a line comment with a newline).
// 2) a selective clang-format run:
//    - the "source code" passed to clang format is the code up to the cursor,
//      a placeholder for the cursor, and some closing brackets
//    - the formatting is restricted to the cursor and (possibly) other ranges
//      (e.g. the old line when inserting a newline).
//    - changes before the cursor are applied, those after are discarded.
struct IncrementalChanges {
  // Changes that should be applied before running clang-format.
  tooling::Replacements Changes;
  // Ranges of the original source code that should be clang-formatted.
  // The CursorProxyText will also be formatted.
  std::vector<tooling::Range> FormatRanges;
  // The source code that should stand in for the cursor when clang-formatting.
  // e.g. after inserting a newline, a line-comment at the cursor is used to
  // ensure that the newline is preserved.
  std::string CursorPlaceholder;
};

// After a newline:
//  - we continue any line-comment that was split
//  - we format the old line in addition to the cursor
//  - we represent the cursor with a line comment to preserve the newline
IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
                                                     unsigned Cursor) {
  IncrementalChanges Result;
  // Before newline, code looked like:
  //    leading^trailing
  // After newline, code looks like:
  //    leading
  //    indentation^trailing
  // Where indentation was added by the editor.
  StringRef Trailing = firstLine(Code.substr(Cursor));
  StringRef Indentation = lastLine(Code.take_front(Cursor));
  if (Indentation.data() == Code.data()) {
    vlog("Typed a newline, but we're still on the first line!");
    return Result;
  }
  StringRef Leading =
      lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
  StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));

  // Strip leading whitespace on trailing line.
  StringRef TrailingTrim = Trailing.ltrim();
  if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
    cantFail(Result.Changes.add(
        replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));

  // If we split a comment, replace indentation with a comment marker.
  // If the editor made the new line a comment, also respect that.
  StringRef CommentMarker = commentMarker(Leading);
  bool NewLineIsComment = !commentMarker(Indentation).empty();
  if (!CommentMarker.empty() &&
      (NewLineIsComment || !commentMarker(NextLine).empty() ||
       (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
    using llvm::sys::unicode::columnWidthUTF8;
    // We indent the new comment to match the previous one.
    StringRef PreComment =
        Leading.take_front(CommentMarker.data() - Leading.data());
    std::string IndentAndComment =
        (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
            .str();
    cantFail(
        Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
  } else {
    // Remove any indentation and let clang-format re-add it.
    // This prevents the cursor marker dragging e.g. an aligned comment with it.
    cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
  }

  // If we put a the newline inside a {} pair, put } on its own line...
  if (CommentMarker.empty() && Leading.endswith("{") &&
      Trailing.startswith("}")) {
    cantFail(
        Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
    // ...and format it.
    Result.FormatRanges.push_back(
        tooling::Range(Trailing.data() - Code.data() + 1, 1));
  }

  // Format the whole leading line.
  Result.FormatRanges.push_back(
      tooling::Range(Leading.data() - Code.data(), Leading.size()));

  // We use a comment to represent the cursor, to preserve the newline.
  // A trailing identifier improves parsing of e.g. for without braces.
  // Exception: if the previous line has a trailing comment, we can't use one
  // as the cursor (they will be aligned). But in this case we don't need to.
  Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";

  return Result;
}

IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
                                         llvm::StringRef InsertedText) {
  IncrementalChanges Result;
  if (InsertedText == "\n")
    return getIncrementalChangesAfterNewline(Code, Cursor);

  Result.CursorPlaceholder = " /**/";
  return Result;
}

// Returns equivalent replacements that preserve the correspondence between
// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
// replacement will be split.
std::vector<tooling::Replacement>
split(const tooling::Replacements &Replacements, unsigned OldCursor,
      unsigned NewCursor) {
  std::vector<tooling::Replacement> Result;
  int LengthChange = 0;
  for (const tooling::Replacement &R : Replacements) {
    if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
      Result.push_back(R);
      LengthChange += R.getReplacementText().size() - R.getLength();
    } else if (R.getOffset() < OldCursor) { // overlaps cursor
      int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
      assert(ReplacementSplit >= 0 &&
             ReplacementSplit <= int(R.getReplacementText().size()) &&
             "NewCursor incompatible with OldCursor!");
      Result.push_back(tooling::Replacement(
          R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
          R.getReplacementText().take_front(ReplacementSplit)));
      Result.push_back(tooling::Replacement(
          R.getFilePath(), OldCursor,
          R.getLength() - (OldCursor - R.getOffset()),
          R.getReplacementText().drop_front(ReplacementSplit)));
    } else if (R.getOffset() >= OldCursor) { // after cursor
      Result.push_back(R);
    }
  }
  return Result;
}

} // namespace

// We're simulating the following sequence of changes:
//   - apply the pre-formatting edits (see getIncrementalChanges)
//   - insert a placeholder for the cursor
//   - format some of the resulting code
//   - remove the cursor placeholder again
// The replacements we return are produced by composing these.
//
// The text we actually pass to clang-format is slightly different from this,
// e.g. we have to close brackets. We ensure these differences are *after*
// all the regions we want to format, and discard changes in them.
std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
                  llvm::StringRef InsertedText, format::FormatStyle Style) {
  IncrementalChanges Incremental =
      getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
  // Never *remove* lines in response to pressing enter! This annoys users.
  if (InsertedText == "\n") {
    Style.MaxEmptyLinesToKeep = 1000;
    Style.KeepEmptyLinesAtTheStartOfBlocks = true;
  }

  // Compute the code we want to format:
  // 1) Start with code after the pre-formatting edits.
  std::string CodeToFormat = cantFail(
      tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
  unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
  // 2) Truncate code after the last interesting range.
  unsigned FormatLimit = Cursor;
  for (tooling::Range &R : Incremental.FormatRanges)
    FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
  CodeToFormat.resize(FormatLimit);
  // 3) Insert a placeholder for the cursor.
  CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
  // 4) Append brackets after FormatLimit so the code is well-formed.
  closeBrackets(CodeToFormat, Style);

  // Determine the ranges to format:
  std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
  // Ranges after the cursor need to be adjusted for the placeholder.
  for (auto &R : RangesToFormat) {
    if (R.getOffset() > Cursor)
      R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
                         R.getLength());
  }
  // We also format the cursor.
  RangesToFormat.push_back(
      tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
  // Also update FormatLimit for the placeholder, we'll use this later.
  FormatLimit += Incremental.CursorPlaceholder.size();

  // Run clang-format, and truncate changes at FormatLimit.
  tooling::Replacements FormattingChanges;
  format::FormattingAttemptStatus Status;
  for (const tooling::Replacement &R : format::reformat(
           Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
    if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
      cantFail(FormattingChanges.add(R));
    else if(R.getOffset() < FormatLimit) { // Overlaps limit.
      if (R.getReplacementText().empty()) // Deletions are easy to handle.
        cantFail(FormattingChanges.add(tooling::Replacement(Filename,
            R.getOffset(), FormatLimit - R.getOffset(), "")));
      else
        // Hopefully won't happen in practice?
        elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
             Cursor, CodeToFormat);
    }
  }
  if (!Status.FormatComplete)
    vlog("Incremental format incomplete at line {0}", Status.Line);

  // Now we are ready to compose the changes relative to OriginalCode.
  //   edits -> insert placeholder -> format -> remove placeholder.
  // We must express insert/remove as Replacements.
  tooling::Replacements InsertCursorPlaceholder(
      tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
  unsigned FormattedCursorStart =
               FormattingChanges.getShiftedCodePosition(Cursor),
           FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
               Cursor + Incremental.CursorPlaceholder.size());
  tooling::Replacements RemoveCursorPlaceholder(
      tooling::Replacement(Filename, FormattedCursorStart,
                           FormattedCursorEnd - FormattedCursorStart, ""));

  // We can't simply merge() and return: tooling::Replacements will combine
  // adjacent edits left and right of the cursor. This gives the right source
  // code, but loses information about where the cursor is!
  // Fortunately, none of the individual passes lose information, so:
  //  - we use merge() to compute the final Replacements
  //  - we chain getShiftedCodePosition() to compute final cursor position
  //  - we split the final Replacements at the cursor position, so that
  //    each Replacement lies either before or after the cursor.
  tooling::Replacements Final;
  unsigned FinalCursor = OriginalCursor;
#ifndef NDEBUG
  std::string FinalCode = OriginalCode;
  dlog("Initial code: {0}", FinalCode);
#endif
  for (auto Pass :
       std::vector<std::pair<const char *, const tooling::Replacements *>>{
           {"Pre-formatting changes", &Incremental.Changes},
           {"Insert placeholder", &InsertCursorPlaceholder},
           {"clang-format", &FormattingChanges},
           {"Remove placeholder", &RemoveCursorPlaceholder}}) {
    Final = Final.merge(*Pass.second);
    FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
#ifndef NDEBUG
    FinalCode =
        cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
    dlog("After {0}:\n{1}^{2}", Pass.first,
         StringRef(FinalCode).take_front(FinalCursor),
         StringRef(FinalCode).drop_front(FinalCursor));
#endif
  }
  return split(Final, OriginalCursor, FinalCursor);
}

unsigned
transformCursorPosition(unsigned Offset,
                        const std::vector<tooling::Replacement> &Replacements) {
  unsigned OriginalOffset = Offset;
  for (const auto &R : Replacements) {
    if (R.getOffset() + R.getLength() <= OriginalOffset) {
      // Replacement is before cursor.
      Offset += R.getReplacementText().size();
      Offset -= R.getLength();
    } else if (R.getOffset() < OriginalOffset) {
      // Replacement overlaps cursor.
      // Preserve position within replacement text, as far as possible.
      unsigned PositionWithinReplacement = Offset - R.getOffset();
      if (PositionWithinReplacement > R.getReplacementText().size()) {
        Offset += R.getReplacementText().size();
        Offset -= PositionWithinReplacement;
      }
    } else {
      // Replacement after cursor.
      break; // Replacements are sorted, the rest are also after the cursor.
    }
  }
  return Offset;
}

} // namespace clangd
} // namespace clang