reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
//===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the SparseSet class derived from the version described in
// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec.  1993.
//
// A sparse set holds a small number of objects identified by integer keys from
// a moderately sized universe. The sparse set uses more memory than other
// containers in order to provide faster operations.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_ADT_SPARSESET_H
#define LLVM_ADT_SPARSESET_H

#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <utility>

namespace llvm {

/// SparseSetValTraits - Objects in a SparseSet are identified by keys that can
/// be uniquely converted to a small integer less than the set's universe. This
/// class allows the set to hold values that differ from the set's key type as
/// long as an index can still be derived from the value. SparseSet never
/// directly compares ValueT, only their indices, so it can map keys to
/// arbitrary values. SparseSetValTraits computes the index from the value
/// object. To compute the index from a key, SparseSet uses a separate
/// KeyFunctorT template argument.
///
/// A simple type declaration, SparseSet<Type>, handles these cases:
/// - unsigned key, identity index, identity value
/// - unsigned key, identity index, fat value providing getSparseSetIndex()
///
/// The type declaration SparseSet<Type, UnaryFunction> handles:
/// - unsigned key, remapped index, identity value (virtual registers)
/// - pointer key, pointer-derived index, identity value (node+ID)
/// - pointer key, pointer-derived index, fat value with getSparseSetIndex()
///
/// Only other, unexpected cases require specializing SparseSetValTraits.
///
/// For best results, ValueT should not require a destructor.
///
template<typename ValueT>
struct SparseSetValTraits {
  static unsigned getValIndex(const ValueT &Val) {
    return Val.getSparseSetIndex();
  }
};

/// SparseSetValFunctor - Helper class for selecting SparseSetValTraits. The
/// generic implementation handles ValueT classes which either provide
/// getSparseSetIndex() or specialize SparseSetValTraits<>.
///
template<typename KeyT, typename ValueT, typename KeyFunctorT>
struct SparseSetValFunctor {
  unsigned operator()(const ValueT &Val) const {
    return SparseSetValTraits<ValueT>::getValIndex(Val);
  }
};

/// SparseSetValFunctor<KeyT, KeyT> - Helper class for the common case of
/// identity key/value sets.
template<typename KeyT, typename KeyFunctorT>
struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> {
  unsigned operator()(const KeyT &Key) const {
    return KeyFunctorT()(Key);
  }
};

/// SparseSet - Fast set implmentation for objects that can be identified by
/// small unsigned keys.
///
/// SparseSet allocates memory proportional to the size of the key universe, so
/// it is not recommended for building composite data structures.  It is useful
/// for algorithms that require a single set with fast operations.
///
/// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast
/// clear() and iteration as fast as a vector.  The find(), insert(), and
/// erase() operations are all constant time, and typically faster than a hash
/// table.  The iteration order doesn't depend on numerical key values, it only
/// depends on the order of insert() and erase() operations.  When no elements
/// have been erased, the iteration order is the insertion order.
///
/// Compared to BitVector, SparseSet<unsigned> uses 8x-40x more memory, but
/// offers constant-time clear() and size() operations as well as fast
/// iteration independent on the size of the universe.
///
/// SparseSet contains a dense vector holding all the objects and a sparse
/// array holding indexes into the dense vector.  Most of the memory is used by
/// the sparse array which is the size of the key universe.  The SparseT
/// template parameter provides a space/speed tradeoff for sets holding many
/// elements.
///
/// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse
/// array uses 4 x Universe bytes.
///
/// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache
/// lines, but the sparse array is 4x smaller.  N is the number of elements in
/// the set.
///
/// For sets that may grow to thousands of elements, SparseT should be set to
/// uint16_t or uint32_t.
///
/// @tparam ValueT      The type of objects in the set.
/// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
/// @tparam SparseT     An unsigned integer type. See above.
///
template<typename ValueT,
         typename KeyFunctorT = identity<unsigned>,
         typename SparseT = uint8_t>
class SparseSet {
  static_assert(std::numeric_limits<SparseT>::is_integer &&
                !std::numeric_limits<SparseT>::is_signed,
                "SparseT must be an unsigned integer type");

  using KeyT = typename KeyFunctorT::argument_type;
  using DenseT = SmallVector<ValueT, 8>;
  using size_type = unsigned;
  DenseT Dense;
  SparseT *Sparse = nullptr;
  unsigned Universe = 0;
  KeyFunctorT KeyIndexOf;
  SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf;

public:
  using value_type = ValueT;
  using reference = ValueT &;
  using const_reference = const ValueT &;
  using pointer = ValueT *;
  using const_pointer = const ValueT *;

  SparseSet() = default;
  SparseSet(const SparseSet &) = delete;
  SparseSet &operator=(const SparseSet &) = delete;
  ~SparseSet() { free(Sparse); }

  /// setUniverse - Set the universe size which determines the largest key the
  /// set can hold.  The universe must be sized before any elements can be
  /// added.
  ///
  /// @param U Universe size. All object keys must be less than U.
  ///
  void setUniverse(unsigned U) {
    // It's not hard to resize the universe on a non-empty set, but it doesn't
    // seem like a likely use case, so we can add that code when we need it.
    assert(empty() && "Can only resize universe on an empty map");
    // Hysteresis prevents needless reallocations.
    if (U >= Universe/4 && U <= Universe)
      return;
    free(Sparse);
    // The Sparse array doesn't actually need to be initialized, so malloc
    // would be enough here, but that will cause tools like valgrind to
    // complain about branching on uninitialized data.
    Sparse = static_cast<SparseT*>(safe_calloc(U, sizeof(SparseT)));
    Universe = U;
  }

  // Import trivial vector stuff from DenseT.
  using iterator = typename DenseT::iterator;
  using const_iterator = typename DenseT::const_iterator;

  const_iterator begin() const { return Dense.begin(); }
  const_iterator end() const { return Dense.end(); }
  iterator begin() { return Dense.begin(); }
  iterator end() { return Dense.end(); }

  /// empty - Returns true if the set is empty.
  ///
  /// This is not the same as BitVector::empty().
  ///
  bool empty() const { return Dense.empty(); }

  /// size - Returns the number of elements in the set.
  ///
  /// This is not the same as BitVector::size() which returns the size of the
  /// universe.
  ///
  size_type size() const { return Dense.size(); }

  /// clear - Clears the set.  This is a very fast constant time operation.
  ///
  void clear() {
    // Sparse does not need to be cleared, see find().
    Dense.clear();
  }

  /// findIndex - Find an element by its index.
  ///
  /// @param   Idx A valid index to find.
  /// @returns An iterator to the element identified by key, or end().
  ///
  iterator findIndex(unsigned Idx) {
    assert(Idx < Universe && "Key out of range");
    const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
    for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) {
      const unsigned FoundIdx = ValIndexOf(Dense[i]);
      assert(FoundIdx < Universe && "Invalid key in set. Did object mutate?");
      if (Idx == FoundIdx)
        return begin() + i;
      // Stride is 0 when SparseT >= unsigned.  We don't need to loop.
      if (!Stride)
        break;
    }
    return end();
  }

  /// find - Find an element by its key.
  ///
  /// @param   Key A valid key to find.
  /// @returns An iterator to the element identified by key, or end().
  ///
  iterator find(const KeyT &Key) {
    return findIndex(KeyIndexOf(Key));
  }

  const_iterator find(const KeyT &Key) const {
    return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key));
  }

  /// count - Returns 1 if this set contains an element identified by Key,
  /// 0 otherwise.
  ///
  size_type count(const KeyT &Key) const {
    return find(Key) == end() ? 0 : 1;
  }

  /// insert - Attempts to insert a new element.
  ///
  /// If Val is successfully inserted, return (I, true), where I is an iterator
  /// pointing to the newly inserted element.
  ///
  /// If the set already contains an element with the same key as Val, return
  /// (I, false), where I is an iterator pointing to the existing element.
  ///
  /// Insertion invalidates all iterators.
  ///
  std::pair<iterator, bool> insert(const ValueT &Val) {
    unsigned Idx = ValIndexOf(Val);
    iterator I = findIndex(Idx);
    if (I != end())
      return std::make_pair(I, false);
    Sparse[Idx] = size();
    Dense.push_back(Val);
    return std::make_pair(end() - 1, true);
  }

  /// array subscript - If an element already exists with this key, return it.
  /// Otherwise, automatically construct a new value from Key, insert it,
  /// and return the newly inserted element.
  ValueT &operator[](const KeyT &Key) {
    return *insert(ValueT(Key)).first;
  }

  ValueT pop_back_val() {
    // Sparse does not need to be cleared, see find().
    return Dense.pop_back_val();
  }

  /// erase - Erases an existing element identified by a valid iterator.
  ///
  /// This invalidates all iterators, but erase() returns an iterator pointing
  /// to the next element.  This makes it possible to erase selected elements
  /// while iterating over the set:
  ///
  ///   for (SparseSet::iterator I = Set.begin(); I != Set.end();)
  ///     if (test(*I))
  ///       I = Set.erase(I);
  ///     else
  ///       ++I;
  ///
  /// Note that end() changes when elements are erased, unlike std::list.
  ///
  iterator erase(iterator I) {
    assert(unsigned(I - begin()) < size() && "Invalid iterator");
    if (I != end() - 1) {
      *I = Dense.back();
      unsigned BackIdx = ValIndexOf(Dense.back());
      assert(BackIdx < Universe && "Invalid key in set. Did object mutate?");
      Sparse[BackIdx] = I - begin();
    }
    // This depends on SmallVector::pop_back() not invalidating iterators.
    // std::vector::pop_back() doesn't give that guarantee.
    Dense.pop_back();
    return I;
  }

  /// erase - Erases an element identified by Key, if it exists.
  ///
  /// @param   Key The key identifying the element to erase.
  /// @returns True when an element was erased, false if no element was found.
  ///
  bool erase(const KeyT &Key) {
    iterator I = find(Key);
    if (I == end())
      return false;
    erase(I);
    return true;
  }
};

} // end namespace llvm

#endif // LLVM_ADT_SPARSESET_H