reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file adds DWARF discriminators to the IR. Path discriminators are
// used to decide what CFG path was taken inside sub-graphs whose instructions
// share the same line and column number information.
//
// The main user of this is the sample profiler. Instruction samples are
// mapped to line number information. Since a single line may be spread
// out over several basic blocks, discriminators add more precise location
// for the samples.
//
// For example,
//
//   1  #define ASSERT(P)
//   2      if (!(P))
//   3        abort()
//   ...
//   100   while (true) {
//   101     ASSERT (sum < 0);
//   102     ...
//   130   }
//
// when converted to IR, this snippet looks something like:
//
// while.body:                                       ; preds = %entry, %if.end
//   %0 = load i32* %sum, align 4, !dbg !15
//   %cmp = icmp slt i32 %0, 0, !dbg !15
//   br i1 %cmp, label %if.end, label %if.then, !dbg !15
//
// if.then:                                          ; preds = %while.body
//   call void @abort(), !dbg !15
//   br label %if.end, !dbg !15
//
// Notice that all the instructions in blocks 'while.body' and 'if.then'
// have exactly the same debug information. When this program is sampled
// at runtime, the profiler will assume that all these instructions are
// equally frequent. This, in turn, will consider the edge while.body->if.then
// to be frequently taken (which is incorrect).
//
// By adding a discriminator value to the instructions in block 'if.then',
// we can distinguish instructions at line 101 with discriminator 0 from
// the instructions at line 101 with discriminator 1.
//
// For more details about DWARF discriminators, please visit
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include <utility>

using namespace llvm;

#define DEBUG_TYPE "add-discriminators"

// Command line option to disable discriminator generation even in the
// presence of debug information. This is only needed when debugging
// debug info generation issues.
static cl::opt<bool> NoDiscriminators(
    "no-discriminators", cl::init(false),
    cl::desc("Disable generation of discriminator information."));

namespace {

// The legacy pass of AddDiscriminators.
struct AddDiscriminatorsLegacyPass : public FunctionPass {
  static char ID; // Pass identification, replacement for typeid

  AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
    initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
  }

  bool runOnFunction(Function &F) override;
};

} // end anonymous namespace

char AddDiscriminatorsLegacyPass::ID = 0;

INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
                      "Add DWARF path discriminators", false, false)
INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
                    "Add DWARF path discriminators", false, false)

// Create the legacy AddDiscriminatorsPass.
FunctionPass *llvm::createAddDiscriminatorsPass() {
  return new AddDiscriminatorsLegacyPass();
}

static bool shouldHaveDiscriminator(const Instruction *I) {
  return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
}

/// Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
/// basic block and its successors. Suppose there is a basic block B1
/// with successor B2. The last instruction I1 in B1 and the first
/// instruction I2 in B2 are located at the same file and line number.
/// This situation is illustrated in the following code snippet:
///
///       if (i < 10) x = i;
///
///     entry:
///       br i1 %cmp, label %if.then, label %if.end, !dbg !10
///     if.then:
///       %1 = load i32* %i.addr, align 4, !dbg !10
///       store i32 %1, i32* %x, align 4, !dbg !10
///       br label %if.end, !dbg !10
///     if.end:
///       ret void, !dbg !12
///
/// Notice how the branch instruction in block 'entry' and all the
/// instructions in block 'if.then' have the exact same debug location
/// information (!dbg !10).
///
/// To distinguish instructions in block 'entry' from instructions in
/// block 'if.then', we generate a new lexical block for all the
/// instruction in block 'if.then' that share the same file and line
/// location with the last instruction of block 'entry'.
///
/// This new lexical block will have the same location information as
/// the previous one, but with a new DWARF discriminator value.
///
/// One of the main uses of this discriminator value is in runtime
/// sample profilers. It allows the profiler to distinguish instructions
/// at location !dbg !10 that execute on different basic blocks. This is
/// important because while the predicate 'if (x < 10)' may have been
/// executed millions of times, the assignment 'x = i' may have only
/// executed a handful of times (meaning that the entry->if.then edge is
/// seldom taken).
///
/// If we did not have discriminator information, the profiler would
/// assign the same weight to both blocks 'entry' and 'if.then', which
/// in turn will make it conclude that the entry->if.then edge is very
/// hot.
///
/// To decide where to create new discriminator values, this function
/// traverses the CFG and examines instruction at basic block boundaries.
/// If the last instruction I1 of a block B1 is at the same file and line
/// location as instruction I2 of successor B2, then it creates a new
/// lexical block for I2 and all the instruction in B2 that share the same
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
static bool addDiscriminators(Function &F) {
  // If the function has debug information, but the user has disabled
  // discriminators, do nothing.
  // Simlarly, if the function has no debug info, do nothing.
  if (NoDiscriminators || !F.getSubprogram())
    return false;

  bool Changed = false;

  using Location = std::pair<StringRef, unsigned>;
  using BBSet = DenseSet<const BasicBlock *>;
  using LocationBBMap = DenseMap<Location, BBSet>;
  using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
  using LocationSet = DenseSet<Location>;

  LocationBBMap LBM;
  LocationDiscriminatorMap LDM;

  // Traverse all instructions in the function. If the source line location
  // of the instruction appears in other basic block, assign a new
  // discriminator for this instruction.
  for (BasicBlock &B : F) {
    for (auto &I : B.getInstList()) {
      // Not all intrinsic calls should have a discriminator.
      // We want to avoid a non-deterministic assignment of discriminators at
      // different debug levels. We still allow discriminators on memory
      // intrinsic calls because those can be early expanded by SROA into
      // pairs of loads and stores, and the expanded load/store instructions
      // should have a valid discriminator.
      if (!shouldHaveDiscriminator(&I))
        continue;
      const DILocation *DIL = I.getDebugLoc();
      if (!DIL)
        continue;
      Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
      auto &BBMap = LBM[L];
      auto R = BBMap.insert(&B);
      if (BBMap.size() == 1)
        continue;
      // If we could insert more than one block with the same line+file, a
      // discriminator is needed to distinguish both instructions.
      // Only the lowest 7 bits are used to represent a discriminator to fit
      // it in 1 byte ULEB128 representation.
      unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
      auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
      if (!NewDIL) {
        LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
                          << DIL->getFilename() << ":" << DIL->getLine() << ":"
                          << DIL->getColumn() << ":" << Discriminator << " "
                          << I << "\n");
      } else {
        I.setDebugLoc(NewDIL.getValue());
        LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
                   << DIL->getColumn() << ":" << Discriminator << " " << I
                   << "\n");
      }
      Changed = true;
    }
  }

  // Traverse all instructions and assign new discriminators to call
  // instructions with the same lineno that are in the same basic block.
  // Sample base profile needs to distinguish different function calls within
  // a same source line for correct profile annotation.
  for (BasicBlock &B : F) {
    LocationSet CallLocations;
    for (auto &I : B.getInstList()) {
      // We bypass intrinsic calls for the following two reasons:
      //  1) We want to avoid a non-deterministic assigment of
      //     discriminators.
      //  2) We want to minimize the number of base discriminators used.
      if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))  
        continue;

      DILocation *CurrentDIL = I.getDebugLoc();
      if (!CurrentDIL)
        continue;
      Location L =
          std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
      if (!CallLocations.insert(L).second) {
        unsigned Discriminator = ++LDM[L];
        auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
        if (!NewDIL) {
          LLVM_DEBUG(dbgs()
                     << "Could not encode discriminator: "
                     << CurrentDIL->getFilename() << ":"
                     << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
                     << ":" << Discriminator << " " << I << "\n");
        } else {
          I.setDebugLoc(NewDIL.getValue());
          Changed = true;
        }
      }
    }
  }
  return Changed;
}

bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
  return addDiscriminators(F);
}

PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
                                             FunctionAnalysisManager &AM) {
  if (!addDiscriminators(F))
    return PreservedAnalyses::all();

  // FIXME: should be all()
  return PreservedAnalyses::none();
}