reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Defines the basic structure of the syntax tree. There are two kinds of nodes:
//   - leaf nodes correspond to a token in the expanded token stream,
//   - tree nodes correspond to language grammar constructs.
//
// The tree is initially built from an AST. Each node of a newly built tree
// covers a continous subrange of expanded tokens (i.e. tokens after
// preprocessing), the specific tokens coverered are stored in the leaf nodes of
// a tree. A post-order traversal of a tree will visit leaf nodes in an order
// corresponding the original order of expanded tokens.
//
// This is still work in progress and highly experimental, we leave room for
// ourselves to completely change the design and/or implementation.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H

#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Allocator.h"
#include <cstdint>

namespace clang {
namespace syntax {

/// A memory arena for syntax trees. Also tracks the underlying token buffers,
/// source manager, etc.
class Arena {
public:
  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
        TokenBuffer Tokens);

  const SourceManager &sourceManager() const { return SourceMgr; }
  const LangOptions &langOptions() const { return LangOpts; }

  const TokenBuffer &tokenBuffer() const;
  llvm::BumpPtrAllocator &allocator() { return Allocator; }

  /// Add \p Buffer to the underlying source manager, tokenize it and store the
  /// resulting tokens. Useful when there is a need to materialize tokens that
  /// were not written in user code.
  std::pair<FileID, llvm::ArrayRef<syntax::Token>>
  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);

private:
  SourceManager &SourceMgr;
  const LangOptions &LangOpts;
  TokenBuffer Tokens;
  /// IDs and storage for additional tokenized files.
  llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
  /// Keeps all the allocated nodes and their intermediate data structures.
  llvm::BumpPtrAllocator Allocator;
};

class Tree;
class TreeBuilder;
enum class NodeKind : uint16_t;
enum class NodeRole : uint8_t;

/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
/// a Tree (representing language constructrs).
class Node {
public:
  /// Newly created nodes are detached from a tree, parent and sibling links are
  /// set when the node is added as a child to another one.
  Node(NodeKind Kind);

  NodeKind kind() const { return static_cast<NodeKind>(Kind); }
  NodeRole role() const { return static_cast<NodeRole>(Role); }

  const Tree *parent() const { return Parent; }
  Tree *parent() { return Parent; }

  const Node *nextSibling() const { return NextSibling; }
  Node *nextSibling() { return NextSibling; }

  /// Dumps the structure of a subtree. For debugging and testing purposes.
  std::string dump(const Arena &A) const;
  /// Dumps the tokens forming this subtree.
  std::string dumpTokens(const Arena &A) const;

private:
  // Tree is allowed to change the Parent link and Role.
  friend class Tree;

  Tree *Parent;
  Node *NextSibling;
  unsigned Kind : 16;
  unsigned Role : 8;
};

/// A leaf node points to a single token inside the expanded token stream.
class Leaf final : public Node {
public:
  Leaf(const syntax::Token *T);
  static bool classof(const Node *N);

  const syntax::Token *token() const { return Tok; }

private:
  const syntax::Token *Tok;
};

/// A node that has children and represents a syntactic language construct.
class Tree : public Node {
public:
  using Node::Node;
  static bool classof(const Node *N);

  Node *firstChild() { return FirstChild; }
  const Node *firstChild() const { return FirstChild; }

protected:
  /// Find the first node with a corresponding role.
  syntax::Node *findChild(NodeRole R);

private:
  /// Prepend \p Child to the list of children and and sets the parent pointer.
  /// A very low-level operation that does not check any invariants, only used
  /// by TreeBuilder.
  /// EXPECTS: Role != NodeRoleDetached.
  void prependChildLowLevel(Node *Child, NodeRole Role);
  friend class TreeBuilder;

  Node *FirstChild = nullptr;
};

} // namespace syntax
} // namespace clang

#endif