reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
//=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//  This file defines a checker that checks for padding that could be
//  removed by re-ordering members.
//
//===----------------------------------------------------------------------===//

#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <numeric>

using namespace clang;
using namespace ento;

namespace {
class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
private:
  mutable std::unique_ptr<BugType> PaddingBug;
  mutable BugReporter *BR;

public:
  int64_t AllowedPad;

  void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
                    BugReporter &BRArg) const {
    BR = &BRArg;

    // The calls to checkAST* from AnalysisConsumer don't
    // visit template instantiations or lambda classes. We
    // want to visit those, so we make our own RecursiveASTVisitor.
    struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> {
      const PaddingChecker *Checker;
      bool shouldVisitTemplateInstantiations() const { return true; }
      bool shouldVisitImplicitCode() const { return true; }
      explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {}
      bool VisitRecordDecl(const RecordDecl *RD) {
        Checker->visitRecord(RD);
        return true;
      }
      bool VisitVarDecl(const VarDecl *VD) {
        Checker->visitVariable(VD);
        return true;
      }
      // TODO: Visit array new and mallocs for arrays.
    };

    LocalVisitor visitor(this);
    visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
  }

  /// Look for records of overly padded types. If padding *
  /// PadMultiplier exceeds AllowedPad, then generate a report.
  /// PadMultiplier is used to share code with the array padding
  /// checker.
  void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
    if (shouldSkipDecl(RD))
      return;

    // TODO: Figure out why we are going through declarations and not only
    // definitions.
    if (!(RD = RD->getDefinition()))
      return;

    // This is the simplest correct case: a class with no fields and one base
    // class. Other cases are more complicated because of how the base classes
    // & fields might interact, so we don't bother dealing with them.
    // TODO: Support other combinations of base classes and fields.
    if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
      if (CXXRD->field_empty() && CXXRD->getNumBases() == 1)
        return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(),
                           PadMultiplier);

    auto &ASTContext = RD->getASTContext();
    const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
    assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));

    CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
    if (BaselinePad.isZero())
      return;

    CharUnits OptimalPad;
    SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
    std::tie(OptimalPad, OptimalFieldsOrder) =
        calculateOptimalPad(RD, ASTContext, RL);

    CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
    if (DiffPad.getQuantity() <= AllowedPad) {
      assert(!DiffPad.isNegative() && "DiffPad should not be negative");
      // There is not enough excess padding to trigger a warning.
      return;
    }
    reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder);
  }

  /// Look for arrays of overly padded types. If the padding of the
  /// array type exceeds AllowedPad, then generate a report.
  void visitVariable(const VarDecl *VD) const {
    const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
    if (ArrTy == nullptr)
      return;
    uint64_t Elts = 0;
    if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
      Elts = CArrTy->getSize().getZExtValue();
    if (Elts == 0)
      return;
    const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
    if (RT == nullptr)
      return;

    // TODO: Recurse into the fields to see if they have excess padding.
    visitRecord(RT->getDecl(), Elts);
  }

  bool shouldSkipDecl(const RecordDecl *RD) const {
    // TODO: Figure out why we are going through declarations and not only
    // definitions.
    if (!(RD = RD->getDefinition()))
      return true;
    auto Location = RD->getLocation();
    // If the construct doesn't have a source file, then it's not something
    // we want to diagnose.
    if (!Location.isValid())
      return true;
    SrcMgr::CharacteristicKind Kind =
        BR->getSourceManager().getFileCharacteristic(Location);
    // Throw out all records that come from system headers.
    if (Kind != SrcMgr::C_User)
      return true;

    // Not going to attempt to optimize unions.
    if (RD->isUnion())
      return true;
    if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
      // Tail padding with base classes ends up being very complicated.
      // We will skip objects with base classes for now, unless they do not
      // have fields.
      // TODO: Handle more base class scenarios.
      if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0)
        return true;
      if (CXXRD->field_empty() && CXXRD->getNumBases() != 1)
        return true;
      // Virtual bases are complicated, skipping those for now.
      if (CXXRD->getNumVBases() != 0)
        return true;
      // Can't layout a template, so skip it. We do still layout the
      // instantiations though.
      if (CXXRD->getTypeForDecl()->isDependentType())
        return true;
      if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
        return true;
    }
    // How do you reorder fields if you haven't got any?
    else if (RD->field_empty())
      return true;

    auto IsTrickyField = [](const FieldDecl *FD) -> bool {
      // Bitfield layout is hard.
      if (FD->isBitField())
        return true;

      // Variable length arrays are tricky too.
      QualType Ty = FD->getType();
      if (Ty->isIncompleteArrayType())
        return true;
      return false;
    };

    if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField))
      return true;
    return false;
  }

  static CharUnits calculateBaselinePad(const RecordDecl *RD,
                                        const ASTContext &ASTContext,
                                        const ASTRecordLayout &RL) {
    CharUnits PaddingSum;
    CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
    for (const FieldDecl *FD : RD->fields()) {
      // This checker only cares about the padded size of the
      // field, and not the data size. If the field is a record
      // with tail padding, then we won't put that number in our
      // total because reordering fields won't fix that problem.
      CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
      auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
      CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
      PaddingSum += (FieldOffset - Offset);
      Offset = FieldOffset + FieldSize;
    }
    PaddingSum += RL.getSize() - Offset;
    return PaddingSum;
  }

  /// Optimal padding overview:
  /// 1.  Find a close approximation to where we can place our first field.
  ///     This will usually be at offset 0.
  /// 2.  Try to find the best field that can legally be placed at the current
  ///     offset.
  ///   a.  "Best" is the largest alignment that is legal, but smallest size.
  ///       This is to account for overly aligned types.
  /// 3.  If no fields can fit, pad by rounding the current offset up to the
  ///     smallest alignment requirement of our fields. Measure and track the
  //      amount of padding added. Go back to 2.
  /// 4.  Increment the current offset by the size of the chosen field.
  /// 5.  Remove the chosen field from the set of future possibilities.
  /// 6.  Go back to 2 if there are still unplaced fields.
  /// 7.  Add tail padding by rounding the current offset up to the structure
  ///     alignment. Track the amount of padding added.

  static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>>
  calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext,
                      const ASTRecordLayout &RL) {
    struct FieldInfo {
      CharUnits Align;
      CharUnits Size;
      const FieldDecl *Field;
      bool operator<(const FieldInfo &RHS) const {
        // Order from small alignments to large alignments,
        // then large sizes to small sizes.
        // then large field indices to small field indices
        return std::make_tuple(Align, -Size,
                               Field ? -static_cast<int>(Field->getFieldIndex())
                                     : 0) <
               std::make_tuple(
                   RHS.Align, -RHS.Size,
                   RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex())
                             : 0);
      }
    };
    SmallVector<FieldInfo, 20> Fields;
    auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
      FieldInfo RetVal;
      RetVal.Field = FD;
      auto &Ctx = FD->getASTContext();
      std::tie(RetVal.Size, RetVal.Align) =
          Ctx.getTypeInfoInChars(FD->getType());
      assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
      if (auto Max = FD->getMaxAlignment())
        RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
      return RetVal;
    };
    std::transform(RD->field_begin(), RD->field_end(),
                   std::back_inserter(Fields), GatherSizesAndAlignments);
    llvm::sort(Fields);
    // This lets us skip over vptrs and non-virtual bases,
    // so that we can just worry about the fields in our object.
    // Note that this does cause us to miss some cases where we
    // could pack more bytes in to a base class's tail padding.
    CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
    CharUnits NewPad;
    SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
    while (!Fields.empty()) {
      unsigned TrailingZeros =
          llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity());
      // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
      // 64 will overflow our unsigned long long. Shifting 63 will turn
      // our long long (and CharUnits internal type) negative. So shift 62.
      long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
      CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
      FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr};

      // In the typical case, this will find the last element
      // of the vector. We won't find a middle element unless
      // we started on a poorly aligned address or have an overly
      // aligned field.
      auto Iter = llvm::upper_bound(Fields, InsertPoint);
      if (Iter != Fields.begin()) {
        // We found a field that we can layout with the current alignment.
        --Iter;
        NewOffset += Iter->Size;
        OptimalFieldsOrder.push_back(Iter->Field);
        Fields.erase(Iter);
      } else {
        // We are poorly aligned, and we need to pad in order to layout another
        // field. Round up to at least the smallest field alignment that we
        // currently have.
        CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
        NewPad += NextOffset - NewOffset;
        NewOffset = NextOffset;
      }
    }
    // Calculate tail padding.
    CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
    NewPad += NewSize - NewOffset;
    return {NewPad, std::move(OptimalFieldsOrder)};
  }

  void reportRecord(
      const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad,
      const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const {
    if (!PaddingBug)
      PaddingBug =
          std::make_unique<BugType>(this, "Excessive Padding", "Performance");

    SmallString<100> Buf;
    llvm::raw_svector_ostream Os(Buf);
    Os << "Excessive padding in '";
    Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(),
                                LangOptions())
       << "'";

    if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
      // TODO: make this show up better in the console output and in
      // the HTML. Maybe just make it show up in HTML like the path
      // diagnostics show.
      SourceLocation ILoc = TSD->getPointOfInstantiation();
      if (ILoc.isValid())
        Os << " instantiated here: "
           << ILoc.printToString(BR->getSourceManager());
    }

    Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
       << OptimalPad.getQuantity() << " is optimal). \n"
       << "Optimal fields order: \n";
    for (const auto *FD : OptimalFieldsOrder)
      Os << FD->getName() << ", \n";
    Os << "consider reordering the fields or adding explicit padding "
          "members.";

    PathDiagnosticLocation CELoc =
        PathDiagnosticLocation::create(RD, BR->getSourceManager());
    auto Report =
        std::make_unique<BasicBugReport>(*PaddingBug, Os.str(), CELoc);
    Report->setDeclWithIssue(RD);
    Report->addRange(RD->getSourceRange());
    BR->emitReport(std::move(Report));
  }
};
} // namespace

void ento::registerPaddingChecker(CheckerManager &Mgr) {
  auto *Checker = Mgr.registerChecker<PaddingChecker>();
  Checker->AllowedPad = Mgr.getAnalyzerOptions()
          .getCheckerIntegerOption(Checker, "AllowedPad");
  if (Checker->AllowedPad < 0)
    Mgr.reportInvalidCheckerOptionValue(
        Checker, "AllowedPad", "a non-negative value");
}

bool ento::shouldRegisterPaddingChecker(const LangOptions &LO) {
  return true;
}