reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Merging Corpora.
//
// The task:
//   Take the existing corpus (possibly empty) and merge new inputs into
//   it so that only inputs with new coverage ('features') are added.
//   The process should tolerate the crashes, OOMs, leaks, etc.
//
// Algorithm:
//   The outter process collects the set of files and writes their names
//   into a temporary "control" file, then repeatedly launches the inner
//   process until all inputs are processed.
//   The outer process does not actually execute the target code.
//
//   The inner process reads the control file and sees a) list of all the inputs
//   and b) the last processed input. Then it starts processing the inputs one
//   by one. Before processing every input it writes one line to control file:
//   STARTED INPUT_ID INPUT_SIZE
//   After processing an input it write another line:
//   DONE INPUT_ID Feature1 Feature2 Feature3 ...
//   If a crash happens while processing an input the last line in the control
//   file will be "STARTED INPUT_ID" and so the next process will know
//   where to resume.
//
//   Once all inputs are processed by the innner process(es) the outer process
//   reads the control files and does the merge based entirely on the contents
//   of control file.
//   It uses a single pass greedy algorithm choosing first the smallest inputs
//   within the same size the inputs that have more new features.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_FUZZER_MERGE_H
#define LLVM_FUZZER_MERGE_H

#include "FuzzerDefs.h"

#include <istream>
#include <ostream>
#include <set>
#include <vector>

namespace fuzzer {

struct MergeFileInfo {
  std::string Name;
  size_t Size = 0;
  Vector<uint32_t> Features, Cov;
};

struct Merger {
  Vector<MergeFileInfo> Files;
  size_t NumFilesInFirstCorpus = 0;
  size_t FirstNotProcessedFile = 0;
  std::string LastFailure;

  bool Parse(std::istream &IS, bool ParseCoverage);
  bool Parse(const std::string &Str, bool ParseCoverage);
  void ParseOrExit(std::istream &IS, bool ParseCoverage);
  size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
               const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
               Vector<std::string> *NewFiles);
  size_t ApproximateMemoryConsumption() const;
  Set<uint32_t> AllFeatures() const;
};

void CrashResistantMerge(const Vector<std::string> &Args,
                         const Vector<SizedFile> &OldCorpus,
                         const Vector<SizedFile> &NewCorpus,
                         Vector<std::string> *NewFiles,
                         const Set<uint32_t> &InitialFeatures,
                         Set<uint32_t> *NewFeatures,
                         const Set<uint32_t> &InitialCov,
                         Set<uint32_t> *NewCov,
                         const std::string &CFPath,
                         bool Verbose);

}  // namespace fuzzer

#endif  // LLVM_FUZZER_MERGE_H