reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
//===-- Clustering.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Utilities to compute benchmark result clusters.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H

#include "BenchmarkResult.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/Error.h"
#include <limits>
#include <vector>

namespace llvm {
namespace exegesis {

class InstructionBenchmarkClustering {
public:
  enum ModeE { Dbscan, Naive };

  // Clusters `Points` using DBSCAN with the given parameters. See the cc file
  // for more explanations on the algorithm.
  static Expected<InstructionBenchmarkClustering>
  create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
         size_t DbscanMinPts, double AnalysisClusteringEpsilon,
         Optional<unsigned> NumOpcodes = None);

  class ClusterId {
  public:
    static ClusterId noise() { return ClusterId(kNoise); }
    static ClusterId error() { return ClusterId(kError); }
    static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
      return ClusterId(Id, IsUnstable);
    }
    static ClusterId makeValidUnstable(size_t Id) {
      return makeValid(Id, /*IsUnstable=*/true);
    }

    ClusterId() : Id_(kUndef), IsUnstable_(false) {}

    // Compare id's, ignoring the 'unstability' bit.
    bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
    bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }

    bool isValid() const { return Id_ <= kMaxValid; }
    bool isUnstable() const { return IsUnstable_; }
    bool isNoise() const { return Id_ == kNoise; }
    bool isError() const { return Id_ == kError; }
    bool isUndef() const { return Id_ == kUndef; }

    // Precondition: isValid().
    size_t getId() const {
      assert(isValid());
      return Id_;
    }

  private:
    ClusterId(size_t Id, bool IsUnstable = false)
        : Id_(Id), IsUnstable_(IsUnstable) {}

    static constexpr const size_t kMaxValid =
        (std::numeric_limits<size_t>::max() >> 1) - 4;
    static constexpr const size_t kNoise = kMaxValid + 1;
    static constexpr const size_t kError = kMaxValid + 2;
    static constexpr const size_t kUndef = kMaxValid + 3;

    size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
    size_t IsUnstable_ : 1;
  };
  static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");

  struct Cluster {
    Cluster() = delete;
    explicit Cluster(const ClusterId &Id) : Id(Id) {}

    const ClusterId Id;
    // Indices of benchmarks within the cluster.
    std::vector<int> PointIndices;
  };

  ClusterId getClusterIdForPoint(size_t P) const {
    return ClusterIdForPoint_[P];
  }

  const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }

  const Cluster &getCluster(ClusterId Id) const {
    assert(!Id.isUndef() && "unlabeled cluster");
    if (Id.isNoise()) {
      return NoiseCluster_;
    }
    if (Id.isError()) {
      return ErrorCluster_;
    }
    return Clusters_[Id.getId()];
  }

  const std::vector<Cluster> &getValidClusters() const { return Clusters_; }

  // Returns true if the given point is within a distance Epsilon of each other.
  bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
                   const std::vector<BenchmarkMeasure> &Q,
                   const double EpsilonSquared_) const {
    double DistanceSquared = 0.0;
    for (size_t I = 0, E = P.size(); I < E; ++I) {
      const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
      DistanceSquared += Diff * Diff;
    }
    return DistanceSquared <= EpsilonSquared_;
  }

private:
  InstructionBenchmarkClustering(
      const std::vector<InstructionBenchmark> &Points,
      double AnalysisClusteringEpsilonSquared);

  Error validateAndSetup();

  void clusterizeDbScan(size_t MinPts);
  void clusterizeNaive(unsigned NumOpcodes);

  // Stabilization is only needed if dbscan was used to clusterize.
  void stabilize(unsigned NumOpcodes);

  void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;

  bool areAllNeighbours(ArrayRef<size_t> Pts) const;

  const std::vector<InstructionBenchmark> &Points_;
  const double AnalysisClusteringEpsilonSquared_;

  int NumDimensions_ = 0;
  // ClusterForPoint_[P] is the cluster id for Points[P].
  std::vector<ClusterId> ClusterIdForPoint_;
  std::vector<Cluster> Clusters_;
  Cluster NoiseCluster_;
  Cluster ErrorCluster_;
};

class SchedClassClusterCentroid {
public:
  const std::vector<PerInstructionStats> &getStats() const {
    return Representative;
  }

  std::vector<BenchmarkMeasure> getAsPoint() const;

  void addPoint(ArrayRef<BenchmarkMeasure> Point);

  bool validate(InstructionBenchmark::ModeE Mode) const;

private:
  // Measurement stats for the points in the SchedClassCluster.
  std::vector<PerInstructionStats> Representative;
};

} // namespace exegesis
} // namespace llvm

#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H