reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
//===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"

#include <functional>
#include <set>
#include <string>
#include <unordered_map>

namespace llvm {
namespace cfi_verify {

struct GraphResult;

extern bool IgnoreDWARFFlag;

enum class CFIProtectionStatus {
  // This instruction is protected by CFI.
  PROTECTED,
  // The instruction is not an indirect control flow instruction, and thus
  // shouldn't be protected.
  FAIL_NOT_INDIRECT_CF,
  // There is a path to the instruction that was unexpected.
  FAIL_ORPHANS,
  // There is a path to the instruction from a conditional branch that does not
  // properly check the destination for this vcall/icall.
  FAIL_BAD_CONDITIONAL_BRANCH,
  // One of the operands of the indirect CF instruction is modified between the
  // CFI-check and execution.
  FAIL_REGISTER_CLOBBERED,
  // The instruction referenced does not exist. This normally indicates an
  // error in the program, where you try and validate a graph that was created
  // in a different FileAnalysis object.
  FAIL_INVALID_INSTRUCTION,
};

StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);

// Disassembler and analysis tool for machine code files. Keeps track of non-
// sequential control flows, including indirect control flow instructions.
class FileAnalysis {
public:
  // A metadata struct for an instruction.
  struct Instr {
    uint64_t VMAddress;       // Virtual memory address of this instruction.
    MCInst Instruction;       // Instruction.
    uint64_t InstructionSize; // Size of this instruction.
    bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
                // undefined.
  };

  // Construct a FileAnalysis from a file path.
  static Expected<FileAnalysis> Create(StringRef Filename);

  // Construct and take ownership of the supplied object. Do not use this
  // constructor, prefer to use FileAnalysis::Create instead.
  FileAnalysis(object::OwningBinary<object::Binary> Binary);
  FileAnalysis() = delete;
  FileAnalysis(const FileAnalysis &) = delete;
  FileAnalysis(FileAnalysis &&Other) = default;

  // Returns the instruction at the provided address. Returns nullptr if there
  // is no instruction at the provided address.
  const Instr *getInstruction(uint64_t Address) const;

  // Returns the instruction at the provided adress, dying if the instruction is
  // not found.
  const Instr &getInstructionOrDie(uint64_t Address) const;

  // Returns a pointer to the previous/next instruction in sequence,
  // respectively. Returns nullptr if the next/prev instruction doesn't exist,
  // or if the provided instruction doesn't exist.
  const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
  const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;

  // Returns whether this instruction is used by CFI to trap the program.
  bool isCFITrap(const Instr &InstrMeta) const;

  // Returns whether this instruction is a call to a function that will trap on
  // CFI violations (i.e., it serves as a trap in this instance).
  bool willTrapOnCFIViolation(const Instr &InstrMeta) const;

  // Returns whether this function can fall through to the next instruction.
  // Undefined (and bad) instructions cannot fall through, and instruction that
  // modify the control flow can only fall through if they are conditional
  // branches or calls.
  bool canFallThrough(const Instr &InstrMeta) const;

  // Returns the definitive next instruction. This is different from the next
  // instruction sequentially as it will follow unconditional branches (assuming
  // they can be resolved at compile time, i.e. not indirect). This method
  // returns nullptr if the provided instruction does not transfer control flow
  // to exactly one instruction that is known deterministically at compile time.
  // Also returns nullptr if the deterministic target does not exist in this
  // file.
  const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;

  // Get a list of deterministic control flows that lead to the provided
  // instruction. This list includes all static control flow cross-references as
  // well as the previous instruction if it can fall through.
  std::set<const Instr *>
  getDirectControlFlowXRefs(const Instr &InstrMeta) const;

  // Returns whether this instruction uses a register operand.
  bool usesRegisterOperand(const Instr &InstrMeta) const;

  // Returns the list of indirect instructions.
  const std::set<object::SectionedAddress> &getIndirectInstructions() const;

  const MCRegisterInfo *getRegisterInfo() const;
  const MCInstrInfo *getMCInstrInfo() const;
  const MCInstrAnalysis *getMCInstrAnalysis() const;

  // Returns the inlining information for the provided address.
  Expected<DIInliningInfo>
  symbolizeInlinedCode(object::SectionedAddress Address);

  // Returns whether the provided Graph represents a protected indirect control
  // flow instruction in this file.
  CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;

  // Returns the first place the operand register is clobbered between the CFI-
  // check and the indirect CF instruction execution. We do this by walking
  // backwards from the indirect CF and ensuring there is at most one load
  // involving the operand register (which is the indirect CF itself on x86).
  // If the register is not modified, returns the address of the indirect CF
  // instruction. The result is undefined if the provided graph does not fall
  // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see
  // CFIProtectionStatus).
  uint64_t indirectCFOperandClobber(const GraphResult& Graph) const;

  // Prints an instruction to the provided stream using this object's pretty-
  // printers.
  void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;

protected:
  // Construct a blank object with the provided triple and features. Used in
  // testing, where a sub class will dependency inject protected methods to
  // allow analysis of raw binary, without requiring a fully valid ELF file.
  FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);

  // Add an instruction to this object.
  void addInstruction(const Instr &Instruction);

  // Disassemble and parse the provided bytes into this object. Instruction
  // address calculation is done relative to the provided SectionAddress.
  void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
                            object::SectionedAddress Address);

  // Constructs and initialises members required for disassembly.
  Error initialiseDisassemblyMembers();

  // Parses code sections from the internal object file. Saves them into the
  // internal members. Should only be called once by Create().
  Error parseCodeSections();

  // Parses the symbol table to look for the addresses of functions that will
  // trap on CFI violations.
  Error parseSymbolTable();

private:
  // Members that describe the input file.
  object::OwningBinary<object::Binary> Binary;
  const object::ObjectFile *Object = nullptr;
  Triple ObjectTriple;
  std::string ArchName;
  std::string MCPU;
  const Target *ObjectTarget = nullptr;
  SubtargetFeatures Features;

  // Members required for disassembly.
  std::unique_ptr<const MCRegisterInfo> RegisterInfo;
  std::unique_ptr<const MCAsmInfo> AsmInfo;
  std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
  std::unique_ptr<const MCInstrInfo> MII;
  MCObjectFileInfo MOFI;
  std::unique_ptr<MCContext> Context;
  std::unique_ptr<const MCDisassembler> Disassembler;
  std::unique_ptr<const MCInstrAnalysis> MIA;
  std::unique_ptr<MCInstPrinter> Printer;

  // Symbolizer used for debug information parsing.
  std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;

  // A mapping between the virtual memory address to the instruction metadata
  // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
  // insertion allocation.
  std::map<uint64_t, Instr> Instructions;

  // Contains a mapping between a specific address, and a list of instructions
  // that use this address as a branch target (including call instructions).
  DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;

  // A list of addresses of indirect control flow instructions.
  std::set<object::SectionedAddress> IndirectInstructions;

  // The addresses of functions that will trap on CFI violations.
  SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses;
};

class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
public:
  static char ID;
  std::string Text;

  UnsupportedDisassembly(StringRef Text);

  void log(raw_ostream &OS) const override;
  std::error_code convertToErrorCode() const override;
};

} // namespace cfi_verify
} // namespace llvm

#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H