reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements class that drive and introspect deterministic finite-
// state automata (DFAs) as generated by TableGen's -gen-automata backend.
//
// For a description of how to define an automaton, see
// include/llvm/TableGen/Automaton.td.
//
// One important detail is that these deterministic automata are created from
// (potentially) nondeterministic definitions. Therefore a unique sequence of
// input symbols will produce one path through the DFA but multiple paths
// through the original NFA. An automaton by default only returns "accepted" or
// "not accepted", but frequently we want to analyze what NFA path was taken.
// Finding a path through the NFA states that results in a DFA state can help
// answer *what* the solution to a problem was, not just that there exists a
// solution.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_AUTOMATON_H
#define LLVM_SUPPORT_AUTOMATON_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
#include <deque>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>

namespace llvm {

using NfaPath = SmallVector<uint64_t, 4>;

/// Forward define the pair type used by the automata transition info tables.
///
/// Experimental results with large tables have shown a significant (multiple
/// orders of magnitude) parsing speedup by using a custom struct here with a
/// trivial constructor rather than std::pair<uint64_t, uint64_t>.
struct NfaStatePair {
  uint64_t FromDfaState, ToDfaState;

  bool operator<(const NfaStatePair &Other) const {
    return std::make_tuple(FromDfaState, ToDfaState) <
           std::make_tuple(Other.FromDfaState, Other.ToDfaState);
  }
};

namespace internal {
/// The internal class that maintains all possible paths through an NFA based
/// on a path through the DFA.
class NfaTranscriber {
private:
  /// Cached transition table. This is a table of NfaStatePairs that contains
  /// zero-terminated sequences pointed to by DFA transitions.
  ArrayRef<NfaStatePair> TransitionInfo;

  /// A simple linked-list of traversed states that can have a shared tail. The
  /// traversed path is stored in reverse order with the latest state as the
  /// head.
  struct PathSegment {
    uint64_t State;
    PathSegment *Tail;
  };

  /// We allocate segment objects frequently. Allocate them upfront and dispose
  /// at the end of a traversal rather than hammering the system allocator.
  SpecificBumpPtrAllocator<PathSegment> Allocator;

  /// Heads of each tracked path. These are not ordered.
  std::deque<PathSegment *> Heads;

  /// The returned paths. This is populated during getPaths.
  SmallVector<NfaPath, 4> Paths;

  /// Create a new segment and return it.
  PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) {
    PathSegment *P = Allocator.Allocate();
    *P = {State, Tail};
    return P;
  }

  /// Pairs defines a sequence of possible NFA transitions for a single DFA
  /// transition.
  void transition(ArrayRef<NfaStatePair> Pairs) {
    // Iterate over all existing heads. We will mutate the Heads deque during
    // iteration.
    unsigned NumHeads = Heads.size();
    for (unsigned I = 0; I < NumHeads; ++I) {
      PathSegment *Head = Heads[I];
      // The sequence of pairs is sorted. Select the set of pairs that
      // transition from the current head state.
      auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL});
      auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX});
      // For every transition from the current head state, add a new path
      // segment.
      for (; PI != PE; ++PI)
        if (PI->FromDfaState == Head->State)
          Heads.push_back(makePathSegment(PI->ToDfaState, Head));
    }
    // Now we've iterated over all the initial heads and added new ones,
    // dispose of the original heads.
    Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads));
  }

public:
  NfaTranscriber(ArrayRef<NfaStatePair> TransitionInfo)
      : TransitionInfo(TransitionInfo) {
    reset();
  }

  void reset() {
    Paths.clear();
    Heads.clear();
    Allocator.DestroyAll();
    // The initial NFA state is 0.
    Heads.push_back(makePathSegment(0ULL, nullptr));
  }

  void transition(unsigned TransitionInfoIdx) {
    unsigned EndIdx = TransitionInfoIdx;
    while (TransitionInfo[EndIdx].ToDfaState != 0)
      ++EndIdx;
    ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
                                 EndIdx - TransitionInfoIdx);
    transition(Pairs);
  }

  ArrayRef<NfaPath> getPaths() {
    Paths.clear();
    for (auto *Head : Heads) {
      NfaPath P;
      while (Head->State != 0) {
        P.push_back(Head->State);
        Head = Head->Tail;
      }
      std::reverse(P.begin(), P.end());
      Paths.push_back(std::move(P));
    }
    return Paths;
  }
};
} // namespace internal

/// A deterministic finite-state automaton. The automaton is defined in
/// TableGen; this object drives an automaton defined by tblgen-emitted tables.
///
/// An automaton accepts a sequence of input tokens ("actions"). This class is
/// templated on the type of these actions.
template <typename ActionT> class Automaton {
  /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
  /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
  /// FIXME: This uses a std::map because ActionT can be a pair type including
  /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
  /// DenseMap here.
  /// This is a shared_ptr to allow very quick copy-construction of Automata; this
  /// state is immutable after construction so this is safe.
  using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
  std::shared_ptr<MapTy> M;
  /// An optional transcription object. This uses much more state than simply
  /// traversing the DFA for acceptance, so is heap allocated.
  std::shared_ptr<internal::NfaTranscriber> Transcriber;
  /// The initial DFA state is 1.
  uint64_t State = 1;
  /// True if we should transcribe and false if not (even if Transcriber is defined).
  bool Transcribe;

public:
  /// Create an automaton.
  /// \param Transitions The Transitions table as created by TableGen. Note that
  ///                    because the action type differs per automaton, the
  ///                    table type is templated as ArrayRef<InfoT>.
  /// \param TranscriptionTable The TransitionInfo table as created by TableGen.
  ///
  /// Providing the TranscriptionTable argument as non-empty will enable the
  /// use of transcription, which analyzes the possible paths in the original
  /// NFA taken by the DFA. NOTE: This is substantially more work than simply
  /// driving the DFA, so unless you require the getPaths() method leave this
  /// empty.
  template <typename InfoT>
  Automaton(ArrayRef<InfoT> Transitions,
            ArrayRef<NfaStatePair> TranscriptionTable = {}) {
    if (!TranscriptionTable.empty())
      Transcriber =
          std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
    Transcribe = Transcriber != nullptr;
    M = std::make_shared<MapTy>();
    for (const auto &I : Transitions)
      // Greedily read and cache the transition table.
      M->emplace(std::make_pair(I.FromDfaState, I.Action),
                 std::make_pair(I.ToDfaState, I.InfoIdx));
  }
  Automaton(const Automaton &) = default;

  /// Reset the automaton to its initial state.
  void reset() {
    State = 1;
    if (Transcriber)
      Transcriber->reset();
  }

  /// Enable or disable transcription. Transcription is only available if
  /// TranscriptionTable was provided to the constructor.
  void enableTranscription(bool Enable = true) {
    assert(Transcriber &&
           "Transcription is only available if TranscriptionTable was provided "
           "to the Automaton constructor");
    Transcribe = Enable;
  }

  /// Transition the automaton based on input symbol A. Return true if the
  /// automaton transitioned to a valid state, false if the automaton
  /// transitioned to an invalid state.
  ///
  /// If this function returns false, all methods are undefined until reset() is
  /// called.
  bool add(const ActionT &A) {
    auto I = M->find({State, A});
    if (I == M->end())
      return false;
    if (Transcriber && Transcribe)
      Transcriber->transition(I->second.second);
    State = I->second.first;
    return true;
  }

  /// Return true if the automaton can be transitioned based on input symbol A.
  bool canAdd(const ActionT &A) {
    auto I = M->find({State, A});
    return I != M->end();
  }

  /// Obtain a set of possible paths through the input nondeterministic
  /// automaton that could be obtained from the sequence of input actions
  /// presented to this deterministic automaton.
  ArrayRef<NfaPath> getNfaPaths() {
    assert(Transcriber && Transcribe &&
           "Can only obtain NFA paths if transcribing!");
    return Transcriber->getPaths();
  }
};

} // namespace llvm

#endif // LLVM_SUPPORT_AUTOMATON_H