reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
//===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "InefficientVectorOperationCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Lex/Lexer.h"
#include "../utils/DeclRefExprUtils.h"
#include "../utils/OptionsUtils.h"

using namespace clang::ast_matchers;

namespace clang {
namespace tidy {
namespace performance {

namespace {

// Matcher names. Given the code:
//
// \code
// void f() {
//   vector<T> v;
//   for (int i = 0; i < 10 + 1; ++i) {
//     v.push_back(i);
//   }
//
//   SomeProto p;
//   for (int i = 0; i < 10 + 1; ++i) {
//     p.add_xxx(i);
//   }
// }
// \endcode
//
// The matcher names are bound to following parts of the AST:
//   - LoopCounterName: The entire for loop (as ForStmt).
//   - LoopParentName: The body of function f (as CompoundStmt).
//   - VectorVarDeclName: 'v' (as VarDecl).
//   - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as
//     DeclStmt).
//   - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr).
//   - LoopInitVarName: 'i' (as VarDecl).
//   - LoopEndExpr: '10+1' (as Expr).
// If EnableProto, the proto related names are bound to the following parts:
//   - ProtoVarDeclName: 'p' (as VarDecl).
//   - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt).
//   - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr).
static const char LoopCounterName[] = "for_loop_counter";
static const char LoopParentName[] = "loop_parent";
static const char VectorVarDeclName[] = "vector_var_decl";
static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt";
static const char PushBackOrEmplaceBackCallName[] = "append_call";
static const char ProtoVarDeclName[] = "proto_var_decl";
static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt";
static const char ProtoAddFieldCallName[] = "proto_add_field";
static const char LoopInitVarName[] = "loop_init_var";
static const char LoopEndExprName[] = "loop_end_expr";
static const char RangeLoopName[] = "for_range_loop";

ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() {
  return hasType(cxxRecordDecl(hasAnyName(
      "::std::vector", "::std::set", "::std::unordered_set", "::std::map",
      "::std::unordered_map", "::std::array", "::std::deque")));
}

} // namespace

InefficientVectorOperationCheck::InefficientVectorOperationCheck(
    StringRef Name, ClangTidyContext *Context)
    : ClangTidyCheck(Name, Context),
      VectorLikeClasses(utils::options::parseStringList(
          Options.get("VectorLikeClasses", "::std::vector"))),
      EnableProto(Options.getLocalOrGlobal("EnableProto", false)) {}

void InefficientVectorOperationCheck::storeOptions(
    ClangTidyOptions::OptionMap &Opts) {
  Options.store(Opts, "VectorLikeClasses",
                utils::options::serializeStringList(VectorLikeClasses));
  Options.store(Opts, "EnableProto", EnableProto);
}

void InefficientVectorOperationCheck::AddMatcher(
    const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName,
    StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl,
    StringRef AppendCallName, MatchFinder *Finder) {
  const auto DefaultConstructorCall = cxxConstructExpr(
      hasType(TargetRecordDecl),
      hasDeclaration(cxxConstructorDecl(isDefaultConstructor())));
  const auto TargetVarDecl =
      varDecl(hasInitializer(DefaultConstructorCall)).bind(VarDeclName);
  const auto TargetVarDefStmt =
      declStmt(hasSingleDecl(equalsBoundNode(VarDeclName)))
          .bind(VarDeclStmtName);

  const auto AppendCallExpr =
      cxxMemberCallExpr(
          callee(AppendMethodDecl), on(hasType(TargetRecordDecl)),
          onImplicitObjectArgument(declRefExpr(to(TargetVarDecl))))
          .bind(AppendCallName);
  const auto AppendCall = expr(ignoringImplicit(AppendCallExpr));
  const auto LoopVarInit =
      declStmt(hasSingleDecl(varDecl(hasInitializer(integerLiteral(equals(0))))
                                 .bind(LoopInitVarName)));
  const auto RefersToLoopVar = ignoringParenImpCasts(
      declRefExpr(to(varDecl(equalsBoundNode(LoopInitVarName)))));

  // Matchers for the loop whose body has only 1 push_back/emplace_back calling
  // statement.
  const auto HasInterestingLoopBody = hasBody(
      anyOf(compoundStmt(statementCountIs(1), has(AppendCall)), AppendCall));
  const auto InInterestingCompoundStmt =
      hasParent(compoundStmt(has(TargetVarDefStmt)).bind(LoopParentName));

  // Match counter-based for loops:
  //  for (int i = 0; i < n; ++i) {
  //    v.push_back(...);
  //    // Or: proto.add_xxx(...);
  //  }
  //
  // FIXME: Support more types of counter-based loops like decrement loops.
  Finder->addMatcher(
      forStmt(
          hasLoopInit(LoopVarInit),
          hasCondition(binaryOperator(
              hasOperatorName("<"), hasLHS(RefersToLoopVar),
              hasRHS(expr(unless(hasDescendant(expr(RefersToLoopVar))))
                         .bind(LoopEndExprName)))),
          hasIncrement(unaryOperator(hasOperatorName("++"),
                                     hasUnaryOperand(RefersToLoopVar))),
          HasInterestingLoopBody, InInterestingCompoundStmt)
          .bind(LoopCounterName),
      this);

  // Match for-range loops:
  //   for (const auto& E : data) {
  //     v.push_back(...);
  //     // Or: proto.add_xxx(...);
  //   }
  //
  // FIXME: Support more complex range-expressions.
  Finder->addMatcher(
      cxxForRangeStmt(
          hasRangeInit(declRefExpr(supportedContainerTypesMatcher())),
          HasInterestingLoopBody, InInterestingCompoundStmt)
          .bind(RangeLoopName),
      this);
}

void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) {
  const auto VectorDecl = cxxRecordDecl(hasAnyName(SmallVector<StringRef, 5>(
      VectorLikeClasses.begin(), VectorLikeClasses.end())));
  const auto AppendMethodDecl =
      cxxMethodDecl(hasAnyName("push_back", "emplace_back"));
  AddMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName,
             AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder);

  if (EnableProto) {
    const auto ProtoDecl =
        cxxRecordDecl(isDerivedFrom("::proto2::MessageLite"));

    // A method's name starts with "add_" might not mean it's an add field
    // call; it could be the getter for a proto field of which the name starts
    // with "add_". So we exlude const methods.
    const auto AddFieldMethodDecl =
        cxxMethodDecl(matchesName("::add_"), unless(isConst()));
    AddMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName,
               AddFieldMethodDecl, ProtoAddFieldCallName, Finder);
  }
}

void InefficientVectorOperationCheck::check(
    const MatchFinder::MatchResult &Result) {
  auto* Context = Result.Context;
  if (Context->getDiagnostics().hasUncompilableErrorOccurred())
    return;

  const SourceManager &SM = *Result.SourceManager;
  const auto *VectorVarDecl =
      Result.Nodes.getNodeAs<VarDecl>(VectorVarDeclName);
  const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(LoopCounterName);
  const auto *RangeLoop =
      Result.Nodes.getNodeAs<CXXForRangeStmt>(RangeLoopName);
  const auto *VectorAppendCall =
      Result.Nodes.getNodeAs<CXXMemberCallExpr>(PushBackOrEmplaceBackCallName);
  const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ProtoVarDeclName);
  const auto *ProtoAddFieldCall =
      Result.Nodes.getNodeAs<CXXMemberCallExpr>(ProtoAddFieldCallName);
  const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(LoopEndExprName);
  const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(LoopParentName);

  const CXXMemberCallExpr *AppendCall =
      VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall;
  assert(AppendCall && "no append call expression");

  const Stmt *LoopStmt = ForLoop;
  if (!LoopStmt)
    LoopStmt = RangeLoop;

  const auto *TargetVarDecl = VectorVarDecl;
  if (!TargetVarDecl)
    TargetVarDecl = ProtoVarDecl;

  llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs =
      utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent,
                                            *Context);
  for (const auto *Ref : AllVarRefs) {
    // Skip cases where there are usages (defined as DeclRefExpr that refers
    // to "v") of vector variable / proto variable `v` before the for loop. We
    // consider these usages are operations causing memory preallocation (e.g.
    // "v.resize(n)", "v.reserve(n)").
    //
    // FIXME: make it more intelligent to identify the pre-allocating
    // operations before the for loop.
    if (SM.isBeforeInTranslationUnit(Ref->getLocation(),
                                     LoopStmt->getBeginLoc())) {
      return;
    }
  }

  std::string PartialReserveStmt;
  if (VectorAppendCall != nullptr) {
    PartialReserveStmt = ".reserve";
  } else {
    llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName();
    FieldName.consume_front("add_");
    std::string MutableFieldName = ("mutable_" + FieldName).str();
    PartialReserveStmt = "." + MutableFieldName +
                         "()->Reserve"; // e.g., ".mutable_xxx()->Reserve"
  }

  llvm::StringRef VarName = Lexer::getSourceText(
      CharSourceRange::getTokenRange(
          AppendCall->getImplicitObjectArgument()->getSourceRange()),
      SM, Context->getLangOpts());

  std::string ReserveSize;
  // Handle for-range loop cases.
  if (RangeLoop) {
    // Get the range-expression in a for-range statement represented as
    // `for (range-declarator: range-expression)`.
    StringRef RangeInitExpName =
        Lexer::getSourceText(CharSourceRange::getTokenRange(
                                 RangeLoop->getRangeInit()->getSourceRange()),
                             SM, Context->getLangOpts());
    ReserveSize = (RangeInitExpName + ".size()").str();
  } else if (ForLoop) {
    // Handle counter-based loop cases.
    StringRef LoopEndSource = Lexer::getSourceText(
        CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM,
        Context->getLangOpts());
    ReserveSize = LoopEndSource;
  }

  auto Diag = diag(AppendCall->getBeginLoc(),
                   "%0 is called inside a loop; consider pre-allocating the "
                   "container capacity before the loop")
              << AppendCall->getMethodDecl()->getDeclName();
  if (!ReserveSize.empty()) {
    std::string ReserveStmt =
        (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n").str();
    Diag << FixItHint::CreateInsertion(LoopStmt->getBeginLoc(), ReserveStmt);
  }
}

} // namespace performance
} // namespace tidy
} // namespace clang